# 1. Importing dataset and feature selection

In [1]:
import pandas as pd

df = pd.read_csv('updated_pollution_dataset.csv')
df.head()

Unnamed: 0,Temperature,Humidity,PM2.5,PM10,NO2,SO2,CO,Proximity_to_Industrial_Areas,Population_Density,Air Quality
0,29.8,59.1,5.2,17.9,18.9,9.2,1.72,6.3,319,Moderate
1,28.3,75.6,2.3,12.2,30.8,9.7,1.64,6.0,611,Moderate
2,23.1,74.7,26.7,33.8,24.4,12.6,1.63,5.2,619,Moderate
3,27.1,39.1,6.1,6.3,13.5,5.3,1.15,11.1,551,Good
4,26.5,70.7,6.9,16.0,21.9,5.6,1.01,12.7,303,Good


In [2]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
df['Air Quality'] = encoder.fit_transform(df['Air Quality'])
df.head(10)

Unnamed: 0,Temperature,Humidity,PM2.5,PM10,NO2,SO2,CO,Proximity_to_Industrial_Areas,Population_Density,Air Quality
0,29.8,59.1,5.2,17.9,18.9,9.2,1.72,6.3,319,2
1,28.3,75.6,2.3,12.2,30.8,9.7,1.64,6.0,611,2
2,23.1,74.7,26.7,33.8,24.4,12.6,1.63,5.2,619,2
3,27.1,39.1,6.1,6.3,13.5,5.3,1.15,11.1,551,0
4,26.5,70.7,6.9,16.0,21.9,5.6,1.01,12.7,303,0
5,39.4,96.6,14.6,35.5,42.9,17.9,1.82,3.1,674,1
6,41.7,82.5,1.7,15.8,31.1,12.7,1.8,4.6,735,3
7,31.0,59.6,5.0,16.8,24.2,13.6,1.38,6.3,443,2
8,29.4,93.8,10.3,22.7,45.1,11.8,2.03,5.4,486,3
9,33.2,80.5,11.1,24.4,32.0,15.3,1.69,4.9,535,3


In [3]:
X = df.drop(columns=['Air Quality'])
y = df['Air Quality']

In [4]:
import torch

X = torch.Tensor(X.to_numpy(float))
y = torch.Tensor(y.to_numpy(float))

In [5]:
X

tensor([[ 29.8000,  59.1000,   5.2000,  ...,   1.7200,   6.3000, 319.0000],
        [ 28.3000,  75.6000,   2.3000,  ...,   1.6400,   6.0000, 611.0000],
        [ 23.1000,  74.7000,  26.7000,  ...,   1.6300,   5.2000, 619.0000],
        ...,
        [ 25.9000,  78.2000,  14.2000,  ...,   1.6300,   9.6000, 379.0000],
        [ 25.3000,  44.4000,  21.4000,  ...,   0.8900,  11.6000, 241.0000],
        [ 24.1000,  77.9000,  81.7000,  ...,   1.3800,   8.3000, 461.0000]])

In [6]:
y

tensor([2., 2., 2.,  ..., 2., 0., 2.])

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [8]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

(torch.Size([3750, 9]),
 torch.Size([1250, 9]),
 torch.Size([3750]),
 torch.Size([1250]))

In [9]:
X_train = (X_train - X_train.mean()) / X_train.std()
X_test = (X_test - X_test.mean()) / X_test.std()

In [10]:
X_train

tensor([[-0.3331, -0.0930, -0.4789,  ..., -0.4790, -0.4173,  3.0778],
        [-0.3180, -0.0760, -0.4688,  ..., -0.4794, -0.4160,  2.7195],
        [-0.3375, -0.1596, -0.4167,  ..., -0.4783, -0.4098,  2.4933],
        ...,
        [-0.3337, -0.0609, -0.3639,  ..., -0.4738, -0.4468,  2.1664],
        [-0.3513, -0.0056, -0.3896,  ..., -0.4798, -0.3834,  2.1539],
        [-0.2696,  0.1622, -0.3023,  ..., -0.4646, -0.4625,  3.3731]])

In [11]:
X_test

tensor([[-0.2872, -0.0206, -0.0244,  ..., -0.4693, -0.4689,  4.2952],
        [-0.3334, -0.0497, -0.4803,  ..., -0.4780, -0.4189,  1.0026],
        [-0.3245,  0.0123, -0.4594,  ..., -0.4765, -0.4214,  2.9529],
        ...,
        [-0.2777,  0.0383, -0.4575,  ..., -0.4754, -0.4524,  1.5535],
        [-0.3701, -0.2163, -0.4847,  ..., -0.4792, -0.4119,  1.4902],
        [-0.3296, -0.0922, -0.2960,  ..., -0.4802, -0.4201,  1.0849]])

# 2. Neural Network

## 2.1 Building a base model

In [12]:
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [13]:
from torch import nn

class Air_Quality(nn.Module):
    def __init__(self, input_features, output_features, hidden_units=8):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(in_features=input_features, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=output_features)
        )
        
    def forward(self, x):
        return self.layers(x)
    
model = Air_Quality(input_features=X_train.shape[1], output_features=len(y.unique()))
model

Air_Quality(
  (layers): Sequential(
    (0): Linear(in_features=9, out_features=8, bias=True)
    (1): ReLU()
    (2): Linear(in_features=8, out_features=8, bias=True)
    (3): ReLU()
    (4): Linear(in_features=8, out_features=8, bias=True)
    (5): ReLU()
    (6): Linear(in_features=8, out_features=4, bias=True)
  )
)

## 2.2 Loss function, Optimizer and Accuracy function

In [14]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.005)

In [15]:
X_train.type(), X_test.type(), y_train.type(), y_test.type() 

('torch.FloatTensor',
 'torch.FloatTensor',
 'torch.FloatTensor',
 'torch.FloatTensor')

In [16]:
y_train = y_train.long()
y_test = y_test.long()

The predictable variable has to be of type `long` because `CrossEntropyLoss()` expects it to be scalars(long)

In [17]:
model.parameters

<bound method Module.parameters of Air_Quality(
  (layers): Sequential(
    (0): Linear(in_features=9, out_features=8, bias=True)
    (1): ReLU()
    (2): Linear(in_features=8, out_features=8, bias=True)
    (3): ReLU()
    (4): Linear(in_features=8, out_features=8, bias=True)
    (5): ReLU()
    (6): Linear(in_features=8, out_features=4, bias=True)
  )
)>

In [18]:
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct/len(y_pred)) * 100
    return acc

# 3. Training the model

In [19]:
torch.cuda.manual_seed(42)

epochs = 2001

for epoch in range(epochs):
    model.train()
    
    y_logits = model(X_train)
    y_pred = y_logits.argmax(dim=1)
    
    loss = loss_fn(y_logits, y_train)
    acc = accuracy_fn(y_train, y_pred)
    
    optimizer.zero_grad()
    loss.backward()
    
    optimizer.step()
    
    model.eval()
    with torch.inference_mode():
        test_logits = model(X_test)
        test_pred = test_logits.argmax(dim=1)
        
        test_loss = loss_fn(test_logits, y_test)
        test_acc = accuracy_fn(y_test, test_pred)
        
        
    if epoch % 100 == 0:
        print(f"Epoch:{epoch}: Loss:{loss:.2f} | Accuracy:{acc:.2f} | Test loss:{test_loss:.2f} | Test Accuracy:{test_acc:.2f}")

Epoch:0: Loss:1.38 | Accuracy:29.84 | Test loss:1.37 | Test Accuracy:30.48
Epoch:100: Loss:0.97 | Accuracy:52.37 | Test loss:0.95 | Test Accuracy:54.56
Epoch:200: Loss:0.33 | Accuracy:86.96 | Test loss:0.33 | Test Accuracy:87.12
Epoch:300: Loss:0.28 | Accuracy:88.37 | Test loss:0.28 | Test Accuracy:88.80
Epoch:400: Loss:0.27 | Accuracy:88.96 | Test loss:0.26 | Test Accuracy:89.36
Epoch:500: Loss:0.26 | Accuracy:89.47 | Test loss:0.26 | Test Accuracy:89.60
Epoch:600: Loss:0.26 | Accuracy:89.63 | Test loss:0.25 | Test Accuracy:89.68
Epoch:700: Loss:0.25 | Accuracy:89.89 | Test loss:0.25 | Test Accuracy:89.76
Epoch:800: Loss:0.25 | Accuracy:89.73 | Test loss:0.24 | Test Accuracy:90.64
Epoch:900: Loss:0.25 | Accuracy:90.24 | Test loss:0.24 | Test Accuracy:90.32
Epoch:1000: Loss:0.25 | Accuracy:90.29 | Test loss:0.24 | Test Accuracy:90.56
Epoch:1100: Loss:0.25 | Accuracy:90.43 | Test loss:0.24 | Test Accuracy:90.72
Epoch:1200: Loss:0.24 | Accuracy:90.40 | Test loss:0.23 | Test Accuracy:90.8

The model performs good! But we could fine tune this base model

# 4. Fine-Tuning the model

## 4.1 Architecture Definition

In [20]:
class Air_Quality(nn.Module):
    def __init__(self, input_features, output_features, hidden_units=16):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(in_features=input_features, out_features=hidden_units),
            nn.BatchNorm1d(num_features=hidden_units),
            nn.ReLU(),
            nn.Dropout(0.1),  
            nn.Linear(in_features=hidden_units, out_features=hidden_units),
            nn.BatchNorm1d(num_features=hidden_units),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(in_features=hidden_units, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=output_features)
        )

    def forward(self, x):
        return self.layers(x)

model2 = Air_Quality(input_features=X_train.shape[1], output_features=len(y.unique()))

In [21]:
optimizer = torch.optim.Adam(model2.parameters(), lr=0.001)  

In [22]:
torch.cuda.manual_seed(42)

epochs = 2001

for epoch in range(epochs):
    model2.train()
    
    y_logits = model2(X_train)
    y_pred = y_logits.argmax(dim=1)
    
    loss = loss_fn(y_logits, y_train)
    acc = accuracy_fn(y_train, y_pred)
    
    optimizer.zero_grad()
    loss.backward()
    
    optimizer.step()
    
    model2.eval()
    with torch.inference_mode():
        test_logits = model2(X_test)
        test_pred = test_logits.argmax(dim=1)
        
        test_loss = loss_fn(test_logits, y_test)
        test_acc = accuracy_fn(y_test, test_pred)
        
        
    if epoch % 100 == 0:
        print(f"Epoch:{epoch}: Loss:{loss:.2f} | Accuracy:{acc:.2f} | Test loss:{test_loss:.2f} | Test Accuracy:{test_acc:.2f}")

Epoch:0: Loss:1.37 | Accuracy:29.33 | Test loss:1.36 | Test Accuracy:40.64
Epoch:100: Loss:0.84 | Accuracy:66.27 | Test loss:0.85 | Test Accuracy:64.80
Epoch:200: Loss:0.34 | Accuracy:87.04 | Test loss:0.32 | Test Accuracy:88.96
Epoch:300: Loss:0.28 | Accuracy:88.51 | Test loss:0.23 | Test Accuracy:91.12
Epoch:400: Loss:0.23 | Accuracy:91.01 | Test loss:0.25 | Test Accuracy:89.60
Epoch:500: Loss:0.20 | Accuracy:92.59 | Test loss:0.34 | Test Accuracy:86.24
Epoch:600: Loss:0.16 | Accuracy:94.16 | Test loss:0.24 | Test Accuracy:90.96
Epoch:700: Loss:0.15 | Accuracy:94.45 | Test loss:0.27 | Test Accuracy:90.08
Epoch:800: Loss:0.14 | Accuracy:94.43 | Test loss:0.54 | Test Accuracy:80.80
Epoch:900: Loss:0.14 | Accuracy:94.48 | Test loss:0.20 | Test Accuracy:92.16
Epoch:1000: Loss:0.12 | Accuracy:95.31 | Test loss:0.40 | Test Accuracy:85.92
Epoch:1100: Loss:0.13 | Accuracy:95.04 | Test loss:0.22 | Test Accuracy:91.28
Epoch:1200: Loss:0.12 | Accuracy:95.44 | Test loss:0.47 | Test Accuracy:84.5

Wow! By adding a dropout layer and normalization, the model's accuracy has improved.

There is lower chance that the model is overfit, because we have used two dropout layers that prevent the model from overfitting