In [9]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense, Dropout, LSTM
from keras.optimizers import Adam
from keras.regularizers import l1, l2, L1L2
from keras.callbacks import EarlyStopping
import tensorflow as tf
tf.config.set_visible_devices([], 'GPU') #disables GPU


In [10]:
# Load the dataset
data = pd.read_csv('../filled_mean.csv')

data = data.set_index(pd.to_datetime(data['date']))
data = data.sort_index()
data = data.reset_index(drop=True)
dates = data["date"]
data = data.drop(["date"], axis=1)


In [11]:
#non linear models for black carbon exposure 

In [12]:
#data=data[['BC','N_CPC', 'PM-10', 'PM-2.5', 'PM-1.0', 'NO2', 'O3', 'CO', 'NO', 'TEMP', 'HUM']]
data=data[['BC', 'N_CPC', 'PM-10', 'PM-2.5', 'PM-1.0', 'CO', 'TEMP', 'NO2']]


In [23]:
test=int(len(data)/24*0.1)
val=int((len(data)-test)/24*0.1)
val

19

In [25]:
# Preprocess the dataset
scaler = StandardScaler(with_mean=False)
scaled_data = scaler.fit_transform(data)

test=int(len(data)/24*0.1)
val=int((len(data)-test)/24*0.1)
train=len(data)-test-val

# Split the dataset into training and testing sets


((4565, 8), (4546, 8))

In [6]:
def create_rnn_data(data, n_steps):
    X, y = [], []
    for i in range(0, len(data) - n_steps, 1):
        X.append(data[i:i + n_steps, 1:])
        y.append(data[i + n_steps, 0])
    return np.array(X), np.array(y)

n_steps = 24
X_train, y_train = create_rnn_data(train_data, n_steps)
X_val, y_val = create_rnn_data(val_data, n_steps)
X_test, y_test = create_rnn_data(test_data, n_steps)


In [7]:
input_shape=X_train.shape[-1]
X_val.shape, y_val.shape

((389, 24, 7), (389,))

In [8]:
def create_rnn_model(input_shape):
    model = Sequential()
    model.add(LSTM(30, activation='relu', input_shape=(n_steps, input_shape), return_sequences=True))
    model.add(Dropout(0.1))
    model.add(Dense(units=20, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(units=1, activation='linear'))
    return model

model = create_rnn_model(input_shape=input_shape)
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='mean_squared_error')
model.summary()
early_stop = EarlyStopping(monitor='val_loss', patience=30, restore_best_weights=True)
rlrop=tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', mode='min', patience=10, factor=0.2, min_lr=1e-5)
model.fit(X_train, y_train, epochs=300, batch_size=32, verbose=2, validation_data=(X_val, y_val), callbacks=[early_stop, rlrop])

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 24, 30)            4560      
                                                                 
 dropout (Dropout)           (None, 24, 30)            0         
                                                                 
 dense (Dense)               (None, 24, 20)            620       
                                                                 
 dropout_1 (Dropout)         (None, 24, 20)            0         
                                                                 
 dense_1 (Dense)             (None, 24, 1)             21        
                                                                 
Total params: 5,201
Trainable params: 5,201
Non-trainable params: 0
_________________________________________________________________
Epoch 1/300


2023-06-05 10:55:42.363550: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


116/116 - 1s - loss: 1.1906 - val_loss: 0.9327 - lr: 0.0010 - 1s/epoch - 10ms/step
Epoch 2/300
116/116 - 0s - loss: 1.0515 - val_loss: 0.9302 - lr: 0.0010 - 476ms/epoch - 4ms/step
Epoch 3/300
116/116 - 0s - loss: 1.0325 - val_loss: 0.9182 - lr: 0.0010 - 466ms/epoch - 4ms/step
Epoch 4/300
116/116 - 0s - loss: 1.0241 - val_loss: 0.9188 - lr: 0.0010 - 462ms/epoch - 4ms/step
Epoch 5/300
116/116 - 0s - loss: 1.0161 - val_loss: 0.9179 - lr: 0.0010 - 441ms/epoch - 4ms/step
Epoch 6/300
116/116 - 0s - loss: 1.0154 - val_loss: 0.9174 - lr: 0.0010 - 454ms/epoch - 4ms/step
Epoch 7/300
116/116 - 0s - loss: 1.0122 - val_loss: 0.9308 - lr: 0.0010 - 477ms/epoch - 4ms/step
Epoch 8/300
116/116 - 0s - loss: 1.0115 - val_loss: 0.9244 - lr: 0.0010 - 461ms/epoch - 4ms/step
Epoch 9/300
116/116 - 0s - loss: 1.0085 - val_loss: 0.9195 - lr: 0.0010 - 462ms/epoch - 4ms/step
Epoch 10/300
116/116 - 0s - loss: 1.0066 - val_loss: 0.9228 - lr: 0.0010 - 448ms/epoch - 4ms/step
Epoch 11/300
116/116 - 0s - loss: 1.0083 - 

<keras.callbacks.History at 0x178afab50>

In [None]:
# Make predictions
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# Inverse scale the predictions
y_train_pred = scaler.inverse_transform(np.hstack((X_train[:, 0, :], y_train_pred)))
y_test_pred = scaler.inverse_transform(np.hstack((X_test[:, 0, :], y_test_pred)))

# Calculate RMSE and R2 metrics
train_rmse = np.sqrt(mean_squared_error(data.iloc[n_steps:len(y_train_pred) + n_steps, 0], y_train_pred[:, -1]))
test_rmse = np.sqrt(mean_squared_error(data.iloc[train_data.shape[1] + val_data.shape[1] + n_steps:, 0], y_test_pred[:, -1]))
train_r2 = r2_score(data.iloc[n_steps:len(y_train_pred) + n_steps, 0], y_train_pred[:, -1])
test_r2 = r2_score(data.iloc[train_data.shape[1] + val_data.shape[1] + n_steps:, 0], y_test_pred[:, -1])

