In [98]:
import torch
from torch import tensor
from torch import nn as nn
from torch import optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from torchmetrics import Accuracy
import pandas as pd

##MANIPULATING DATA

In [99]:
data = pd.read_csv('./data/air_quality_health_impact_data.csv')
data.isnull().sum()
# no null values
features = data.drop('HealthImpactClass', axis=1)
label = data['HealthImpactClass'].astype(int)
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

X_train, X_test, Y_train, Y_test = train_test_split(scaled_features, label, test_size=0.2, random_state= 42)
X_train = tensor(X_train, dtype=torch.float32)
X_test = tensor(X_test, dtype=torch.float32)
Y_train = tensor(Y_train.to_numpy(), dtype=torch.long)
Y_test = tensor(Y_test.to_numpy(), dtype=torch.long)

training_set = TensorDataset(X_train, Y_train)
testing_set = TensorDataset(X_test,Y_test)

training_loader = DataLoader(training_set, batch_size=32, shuffle=True)
testing_loader = DataLoader(testing_set, batch_size=32, shuffle=False)


##BUILDING THE MODEL

In [100]:
class MultiModel(nn.Module):
    def __init__(self):
        super(MultiModel, self).__init__()
        self.ly1 = nn.Linear(14, 28)
        self.dp1 = nn.Dropout(0.5)
        self.ly2 = nn.Linear(28, 18)
        self.dp2 = nn.Dropout(0.5)
        self.ly3 = nn.Linear(18, 12)
        self.dp3 = nn.Dropout(0.5)
        self.ly4 = nn.Linear(12,6)
        self.dp4 = nn.Dropout(0.5)
        self.ly5 = nn.Linear(6,5)
        self.act = nn.Softmax(dim = 1)
    def forward(self, x):
        x = self.dp1(F.relu(self.ly1(x)))
        x = self.dp2(F.leaky_relu(self.ly2(x)))
        x = self.dp3(F.leaky_relu(self.ly3(x)))
        x = self.dp4(F.leaky_relu(self.ly4(x)))
        x = self.act(self.ly5(x))
        return x
model = MultiModel()

##TRAINING TOOLS

In [101]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.01)

##EARLY STOP

In [102]:
class EarlyStopping:
    def __init__(self, patience=5, min_delta = .01):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.stop = False
    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > (self.best_loss - self.min_delta):
            self.counter += 1
            if self.counter > self.patience:
                self.stop = True
        else:
            self.counter = 0
            self.best_loss = val_loss
early_stop = EarlyStopping(15, 0.0001)


##TRAINING LOOP

In [103]:
num_epochs = 500
for epoch in range(num_epochs):
    running_loss = 0
    model.train()
    for features, label in training_loader:
        optimizer.zero_grad()
        output = model(features)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    val_loss = 0
    model.eval()
    with torch.no_grad():
        for features, label in testing_loader:
            output = model(features)
            loss = criterion(output, label)
            val_loss += loss.item()
    early_stop(val_loss/len(testing_loader))
    if early_stop.stop:
        print(f"Early Stop {epoch} | {val_loss/len(testing_loader)}")
        break
    if epoch % 20 == 0:
        print(f"Running Loss: {running_loss/len(training_loader)} | Epoch: {epoch}")

Running Loss: 1.1272964812304875 | Epoch: 0
Running Loss: 1.0210820844728652 | Epoch: 20
Early Stop 27 | 1.0142976915514148


##EVALUATION LOOP

In [104]:
accuracy = Accuracy(task='multiclass', num_classes=5)
model.eval()
with torch.no_grad():
  for features, labels in testing_loader:
    output = model(features)
    _, predicted = torch.max(output.data, 1)
    accuracy.update(predicted, labels)

print(f'Accuracy: {accuracy.compute().item()}')

Accuracy: 0.8899397850036621
