In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam


In [3]:
data = pd.read_csv('pollution_dataset.csv')
data.sample(6)

Unnamed: 0,Temperature,Humidity,PM2.5,PM10,NO2,SO2,CO,Proximity_to_Industrial_Areas,Population_Density,Air Quality
2732,27.8,83.6,23.7,37.3,26.1,8.2,1.22,7.4,420,Moderate
4043,24.6,49.7,25.1,26.5,16.4,7.9,1.22,11.2,236,Good
4349,24.8,73.1,6.5,9.8,17.5,10.0,1.02,12.5,287,Good
1693,21.1,54.2,49.3,59.1,30.7,9.9,1.5,6.9,660,Moderate
1688,24.4,54.0,2.2,5.3,27.9,5.3,1.05,10.6,431,Good
2919,23.6,68.7,8.3,13.7,19.7,4.1,0.95,11.2,255,Good


In [9]:
el = LabelEncoder()
data['Air Quality'] = el.fit_transform(data['Air Quality'])
data

Unnamed: 0,Temperature,Humidity,PM2.5,PM10,NO2,SO2,CO,Proximity_to_Industrial_Areas,Population_Density,Air Quality
0,29.8,59.1,5.2,17.9,18.9,9.2,1.72,6.3,319,2
1,28.3,75.6,2.3,12.2,30.8,9.7,1.64,6.0,611,2
2,23.1,74.7,26.7,33.8,24.4,12.6,1.63,5.2,619,2
3,27.1,39.1,6.1,6.3,13.5,5.3,1.15,11.1,551,0
4,26.5,70.7,6.9,16.0,21.9,5.6,1.01,12.7,303,0
...,...,...,...,...,...,...,...,...,...,...
4995,40.6,74.1,116.0,126.7,45.5,25.7,2.11,2.8,765,1
4996,28.1,96.9,6.9,25.0,25.3,10.8,1.54,5.7,709,2
4997,25.9,78.2,14.2,22.1,34.8,7.8,1.63,9.6,379,2
4998,25.3,44.4,21.4,29.0,23.7,5.7,0.89,11.6,241,0


In [11]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 10 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Temperature                    5000 non-null   float64
 1   Humidity                       5000 non-null   float64
 2   PM2.5                          5000 non-null   float64
 3   PM10                           5000 non-null   float64
 4   NO2                            5000 non-null   float64
 5   SO2                            5000 non-null   float64
 6   CO                             5000 non-null   float64
 7   Proximity_to_Industrial_Areas  5000 non-null   float64
 8   Population_Density             5000 non-null   int64  
 9   Air Quality                    5000 non-null   int64  
dtypes: float64(8), int64(2)
memory usage: 390.8 KB


In [13]:
data.isnull().sum()

Temperature                      0
Humidity                         0
PM2.5                            0
PM10                             0
NO2                              0
SO2                              0
CO                               0
Proximity_to_Industrial_Areas    0
Population_Density               0
Air Quality                      0
dtype: int64

In [15]:
X = data.drop('Air Quality', axis=1)
y = data['Air Quality']

In [17]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [21]:
def build_model(layers, activations, input_dim):
    model = Sequential()
    for i, (neurons, activation) in enumerate(zip(layers, activations)):
        if i == 0:
            model.add(Dense(neurons, activation=activation, input_dim=input_dim))
        else:
            model.add(Dense(neurons,activation=activation))
    model.add(Dense(4, activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [23]:
model_1 = build_model(layers=[32, 16], activations=['relu', 'relu'], input_dim=X_train.shape[1])
history_1 = model_1.fit(X_train, y_train, epochs=20, batch_size=32, verbose=1, validation_split=0.2)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.5177 - loss: 1.1853 - val_accuracy: 0.8075 - val_loss: 0.6321
Epoch 2/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8328 - loss: 0.5397 - val_accuracy: 0.8975 - val_loss: 0.3781
Epoch 3/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8912 - loss: 0.3384 - val_accuracy: 0.9075 - val_loss: 0.2852
Epoch 4/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8962 - loss: 0.2830 - val_accuracy: 0.9175 - val_loss: 0.2429
Epoch 5/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9204 - loss: 0.2312 - val_accuracy: 0.9312 - val_loss: 0.2179
Epoch 6/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9312 - loss: 0.2036 - val_accuracy: 0.9225 - val_loss: 0.2013
Epoch 7/20
[1m100/100[0m 

In [25]:
model_2 = build_model(layers=[64, 32, 16], activations=['tanh', 'relu', 'relu'], input_dim=X_train.shape[1])
history_2 = model_2.fit(X_train, y_train, epochs=20, batch_size=32, verbose=1, validation_split=0.2)

Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.4920 - loss: 1.0735 - val_accuracy: 0.8062 - val_loss: 0.4622
Epoch 2/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8337 - loss: 0.4017 - val_accuracy: 0.9100 - val_loss: 0.2957
Epoch 3/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9063 - loss: 0.2627 - val_accuracy: 0.9200 - val_loss: 0.2169
Epoch 4/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9293 - loss: 0.2005 - val_accuracy: 0.9175 - val_loss: 0.1885
Epoch 5/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9375 - loss: 0.1735 - val_accuracy: 0.9362 - val_loss: 0.1691
Epoch 6/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9371 - loss: 0.1671 - val_accuracy: 0.9350 - val_loss: 0.1633
Epoch 7/20
[1m100/100[0m [32m━━━━━━━

In [27]:
model_3 = build_model(layers=[128, 64, 32, 16], activations=['relu', 'relu', 'relu', 'relu'], input_dim=X_train.shape[1])
history_3 = model_3.fit(X_train, y_train, epochs=20, batch_size=32, verbose=1, validation_split=0.2)

Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.5817 - loss: 0.9124 - val_accuracy: 0.8900 - val_loss: 0.3046
Epoch 2/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9080 - loss: 0.2584 - val_accuracy: 0.9087 - val_loss: 0.2139
Epoch 3/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9291 - loss: 0.1796 - val_accuracy: 0.9225 - val_loss: 0.1879
Epoch 4/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9423 - loss: 0.1499 - val_accuracy: 0.9300 - val_loss: 0.1696
Epoch 5/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9388 - loss: 0.1503 - val_accuracy: 0.9337 - val_loss: 0.1663
Epoch 6/20
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9448 - loss: 0.1426 - val_accuracy: 0.9287 - val_loss: 0.1699
Epoch 7/20
[1m100/100[0m [32m━━━━━━━