In [62]:
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, LSTM, Conv1D, MaxPool1D, Dropout
from keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import numpy as np
import pandas as pd
from imblearn.over_sampling import SMOTE

In [35]:
from sklearn.metrics import mean_absolute_error, r2_score

def metric(y_test, preds):
    mae = mean_absolute_error(y_test, preds)
    r2 = r2_score(y_test, preds)

    print(f'MAE = {mae}')
    print(f'r2 = {r2}')

In [14]:
df = pd.read_csv('city_day.csv')

In [15]:
df.shape

(29531, 16)

In [16]:
df = df.drop(['Date', 'AQI_Bucket', 'Xylene'], axis = 1)

In [17]:
df = df.drop_duplicates().dropna()

In [18]:
df.shape

(10654, 13)

In [19]:
sm = SMOTE(random_state=42)

In [20]:
X_s = df.drop(['City'], axis = 1)
Y_s = df['City']

In [21]:
x_res, y_res = sm.fit_resample(X_s, Y_s)

In [22]:
x_res.shape

(30096, 12)

In [23]:
x_temp = x_res.drop(['AQI'], axis=1)
y_temp = x_res['AQI']

In [26]:
x_train, x_test, y_train, y_test = train_test_split(x_temp, y_temp, test_size=0.1, random_state=42)

In [27]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# RNN

In [177]:
X_train_rnn = x_train.reshape((x_train.shape[0], 1, x_train.shape[1]))
X_test_rnn = x_test.reshape((x_test.shape[0], 1, x_test.shape[1]))

In [181]:
model_rnn = Sequential()
model_rnn.add(SimpleRNN(12, input_shape=(X_train_rnn.shape[1], X_train_rnn.shape[2]), activation='relu'))
model_rnn.add(Dense(12, activation='relu'))
model_rnn.add(Dropout(0.2))
model_rnn.add(Dense(1, activation='linear'))

In [182]:
model_rnn.compile(loss='mean_squared_error', optimizer='adam')

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

model_rnn.fit(X_train_rnn, y_train, epochs=50, batch_size=32, validation_split=0.2, callbacks=[early_stopping], verbose=2)

metric(y_test, y_pred_rnn)

Epoch 1/50
678/678 - 2s - loss: 12615.9258 - val_loss: 2713.3096 - 2s/epoch - 3ms/step
Epoch 2/50
678/678 - 1s - loss: 2492.1455 - val_loss: 1078.2948 - 1s/epoch - 2ms/step
Epoch 3/50
678/678 - 1s - loss: 1646.1475 - val_loss: 770.6302 - 1s/epoch - 2ms/step
Epoch 4/50
678/678 - 1s - loss: 1456.8795 - val_loss: 642.7869 - 1s/epoch - 2ms/step
Epoch 5/50
678/678 - 1s - loss: 1393.9368 - val_loss: 635.1069 - 1s/epoch - 2ms/step
Epoch 6/50
678/678 - 1s - loss: 1366.6956 - val_loss: 610.5865 - 1s/epoch - 2ms/step
Epoch 7/50
678/678 - 1s - loss: 1323.3268 - val_loss: 596.1137 - 1s/epoch - 2ms/step
Epoch 8/50
678/678 - 1s - loss: 1286.0631 - val_loss: 581.0851 - 1s/epoch - 2ms/step
Epoch 9/50
678/678 - 1s - loss: 1308.6073 - val_loss: 589.7509 - 1s/epoch - 2ms/step
Epoch 10/50
678/678 - 1s - loss: 1324.9116 - val_loss: 594.6614 - 1s/epoch - 2ms/step
Epoch 11/50
678/678 - 1s - loss: 1281.8810 - val_loss: 600.8553 - 1s/epoch - 2ms/step
Epoch 12/50
678/678 - 1s - loss: 1308.5059 - val_loss: 592.2

# CNN

In [133]:
X_train_cnn = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
X_test_cnn = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

In [173]:
model_cnn = Sequential()
model_cnn.add(Conv1D(filters=16, kernel_size=5, activation='relu', input_shape=(X_train_cnn.shape[1], 1)))
model_cnn.add(MaxPool1D(pool_size=2))
model_cnn.add(Conv1D(filters=32, kernel_size=2, activation='relu', input_shape=(X_train_cnn.shape[1], 1)))
model_cnn.add(MaxPool1D(pool_size=2))
model_cnn.add(Flatten())
model_cnn.add(Dense(11, activation='relu'))
model_cnn.add(Dense(1, activation='linear'))

In [174]:
model_cnn.compile(loss='mean_squared_error', optimizer='adam')

In [123]:
early_stopping = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)

In [175]:
model_cnn.fit(X_train_cnn, y_train, epochs=50, batch_size=32, validation_split=0.2, callbacks=[early_stopping], verbose=2)

Epoch 1/50
678/678 - 2s - loss: 6392.2134 - val_loss: 1748.2080 - 2s/epoch - 3ms/step
Epoch 2/50
678/678 - 1s - loss: 1269.0494 - val_loss: 1000.9542 - 1s/epoch - 2ms/step
Epoch 3/50
678/678 - 1s - loss: 948.8330 - val_loss: 871.9277 - 1s/epoch - 2ms/step
Epoch 4/50
678/678 - 1s - loss: 831.1791 - val_loss: 761.3864 - 1s/epoch - 2ms/step
Epoch 5/50
678/678 - 1s - loss: 755.7728 - val_loss: 698.9586 - 1s/epoch - 2ms/step
Epoch 6/50
678/678 - 1s - loss: 710.1546 - val_loss: 666.4285 - 1s/epoch - 2ms/step
Epoch 7/50
678/678 - 1s - loss: 676.0725 - val_loss: 623.9120 - 1s/epoch - 2ms/step
Epoch 8/50
678/678 - 1s - loss: 645.7086 - val_loss: 651.5801 - 1s/epoch - 2ms/step
Epoch 9/50
678/678 - 1s - loss: 624.7430 - val_loss: 579.1151 - 1s/epoch - 2ms/step
Epoch 10/50
678/678 - 1s - loss: 604.6265 - val_loss: 568.5717 - 1s/epoch - 2ms/step
Epoch 11/50
678/678 - 1s - loss: 590.1046 - val_loss: 552.1035 - 1s/epoch - 2ms/step
Epoch 12/50
678/678 - 1s - loss: 573.6390 - val_loss: 538.0850 - 1s/ep

<keras.src.callbacks.History at 0x1f63005b3d0>

In [176]:
y_pred_cnn = model_cnn.predict(X_test_cnn)
metric(y_test, y_pred_cnn)

MAE = 13.489125748982085
r2 = 0.9431691578122468


# MLP

In [226]:
model = Sequential()
model.add(Dense(128, input_dim=x_train.shape[1], activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))

model.add(Dense(16, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(128, activation='relu'))

model.add(Dense(1, activation='linear'))

In [227]:
model.compile(loss='mean_squared_error', optimizer='adam')

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

model.fit(x_train, y_train, epochs=50, batch_size=32, validation_split=0.2, callbacks=[early_stopping], verbose=2)

y_pred = model.predict(x_test)
metric(y_pred, y_test)

Epoch 1/50
678/678 - 3s - loss: 2043.2936 - val_loss: 527.5334 - 3s/epoch - 4ms/step
Epoch 2/50
678/678 - 1s - loss: 500.2367 - val_loss: 558.2879 - 1s/epoch - 2ms/step
Epoch 3/50
678/678 - 1s - loss: 426.6586 - val_loss: 523.0687 - 1s/epoch - 2ms/step
Epoch 4/50
678/678 - 2s - loss: 403.8649 - val_loss: 371.6602 - 2s/epoch - 2ms/step
Epoch 5/50
678/678 - 2s - loss: 380.1226 - val_loss: 397.6148 - 2s/epoch - 2ms/step
Epoch 6/50
678/678 - 1s - loss: 356.4639 - val_loss: 376.0883 - 1s/epoch - 2ms/step
Epoch 7/50
678/678 - 2s - loss: 355.9767 - val_loss: 351.2362 - 2s/epoch - 2ms/step
Epoch 8/50
678/678 - 2s - loss: 346.0504 - val_loss: 340.8375 - 2s/epoch - 3ms/step
Epoch 9/50
678/678 - 2s - loss: 343.1530 - val_loss: 327.9735 - 2s/epoch - 3ms/step
Epoch 10/50
678/678 - 2s - loss: 335.2800 - val_loss: 325.3408 - 2s/epoch - 3ms/step
Epoch 11/50
678/678 - 2s - loss: 322.5540 - val_loss: 354.6678 - 2s/epoch - 3ms/step
Epoch 12/50
678/678 - 2s - loss: 319.3159 - val_loss: 301.9511 - 2s/epoch