In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense, Dropout, LSTM
from keras.optimizers import Adam
from keras.regularizers import l1, l2, L1L2
from keras.callbacks import EarlyStopping
import tensorflow as tf
tf.config.set_visible_devices([], 'GPU') #disables GPU


In [None]:
# Load the dataset
data = pd.read_csv('../filled_mean.csv')

data = data.set_index(pd.to_datetime(data['date']))
data = data.sort_index()
data = data.reset_index(drop=True)
dates = data["date"]
data = data.drop(["date"], axis=1)


In [None]:
#non linear models for black carbon exposure 

In [None]:
#data=data[['BC','N_CPC', 'PM-10', 'PM-2.5', 'PM-1.0', 'NO2', 'O3', 'CO', 'NO', 'TEMP', 'HUM']]
data=data[['N_CPC', 'PM-10', 'PM-2.5', 'PM-1.0', 'CO', 'TEMP', 'NO2']]


In [None]:
# Preprocess the dataset
scaler = StandardScaler(with_mean=False)
scaled_data = scaler.fit_transform(data)

# Split the dataset into training and testing sets
train_data, test_data = train_test_split(scaled_data, test_size=0.1, shuffle=False)
train_data, val_data = train_test_split(train_data, test_size=0.1, shuffle=False)

In [None]:
def create_rnn_data(data, n_steps):
    X, y = [], []
    for i in range(0, len(data) - n_steps, 1):
        X.append(data[i:i + n_steps, 1:])
        y.append(data[i + n_steps, 0])
    return np.array(X), np.array(y)

n_steps = 24*3
X_train, y_train = create_rnn_data(train_data, n_steps)
X_val, y_val = create_rnn_data(val_data, n_steps)
X_test, y_test = create_rnn_data(test_data, n_steps)


In [None]:
input_shape=X_train.shape[-1]
X_val.shape, y_val.shape

In [None]:
def create_rnn_model(input_shape):
    model = Sequential()
    model.add(LSTM(30, activation='relu', input_shape=(n_steps, input_shape), return_sequences=True))
    model.add(Dropout(0.1))
    model.add(Dense(units=20, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(units=1, activation='linear'))
    return model

model = create_rnn_model(input_shape=input_shape)
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='mean_squared_error')
model.summary()
early_stop = EarlyStopping(monitor='val_loss', patience=30, restore_best_weights=True)
rlrop=tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', mode='min', patience=10, factor=0.2, min_lr=1e-5)
model.fit(X_train, y_train, epochs=300, batch_size=32, verbose=2, validation_data=(X_val, y_val), callbacks=[early_stop, rlrop])

In [None]:
# Make predictions
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# Inverse scale the predictions
y_train_pred = scaler.inverse_transform(np.hstack((X_train[:, 0, :], y_train_pred)))
y_test_pred = scaler.inverse_transform(np.hstack((X_test[:, 0, :], y_test_pred)))

# Calculate RMSE and R2 metrics
train_rmse = np.sqrt(mean_squared_error(data.iloc[n_steps:len(y_train_pred) + n_steps, 0], y_train_pred[:, -1]))
test_rmse = np.sqrt(mean_squared_error(data.iloc[train_data.shape[1] + val_data.shape[1] + n_steps:, 0], y_test_pred[:, -1]))
train_r2 = r2_score(data.iloc[n_steps:len(y_train_pred) + n_steps, 0], y_train_pred[:, -1])
test_r2 = r2_score(data.iloc[train_data.shape[1] + val_data.shape[1] + n_steps:, 0], y_test_pred[:, -1])

