In [18]:
import numpy as np
from scikeras.wrappers import KerasRegressor
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import GaussianNoise
from keras.layers import TimeDistributed
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
import pandas as pd
import matplotlib.pyplot as plt
from math import sqrt 
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

%matplotlib inline
#%tensorflow_version 1.x
import tensorflow as tf
print(tf.__version__)


2.9.1


In [2]:
dataset = pd.read_csv('eMalahleniIM.csv', sep =';', header=0, index_col=0)
values = dataset.values

In [3]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [4]:
dataset.columns

Index(['pm2.5', 'pm10', 'so2', 'no2', 'no', 'nox', 'o3', 'co', 'ws', 'wd',
       'temp', 'relHum', 'pressure'],
      dtype='object')

In [5]:
# ensure all data is float
values = values.astype('float32')

# normalize features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)

# frame as supervised learning
reframed = series_to_supervised(scaled, 1, 1)

# drop columns we don't want to predict
# We drop these because we are only interested in predicting for a single variable (pollution).
# If we don't drop, then we will be predicting for all the variables too!
reframed.drop(reframed.columns[[14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]], axis=1, inplace=True)
values = reframed.values

In [6]:
reframed.head()

Unnamed: 0,var1(t-1),var2(t-1),var3(t-1),var4(t-1),var5(t-1),var6(t-1),var7(t-1),var8(t-1),var9(t-1),var10(t-1),var11(t-1),var12(t-1),var13(t-1),var1(t)
1,0.018966,0.016509,0.017832,0.023121,0.007442,0.015058,0.380849,0.004532,0.487526,0.493486,0.546332,0.833146,0.549068,0.026136
2,0.026136,0.016882,0.06939,0.088378,0.006463,0.036362,0.334627,0.003827,0.455301,0.539821,0.50361,0.942542,0.561046,0.038245
3,0.038245,0.024513,0.104785,0.121924,0.00858,0.049843,0.225011,0.02427,0.373181,0.403761,0.495473,0.916471,0.562208,0.015315
4,0.015315,0.009721,0.03512,0.013445,0.005276,0.009682,0.431832,0.022155,0.505198,0.31312,0.49293,0.856865,0.545851,0.010145
5,0.010145,0.006823,0.022419,0.026773,0.006716,0.015604,0.4087,0.006143,0.279626,0.275397,0.513782,0.789183,0.537181,0.023049


In [7]:
values.shape

(87645, 14)

In [8]:
X = values[:,:-1]

In [9]:
Y = values[:,-1]

In [10]:
X.shape

(87645, 13)

In [11]:
Y.shape

(87645,)

In [12]:
X = X.reshape(X.shape[0],1,X.shape[1])

In [13]:
X.shape

(87645, 1, 13)

In [14]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=42)
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.20, random_state=42)

In [15]:
print ('X_train:',X_train.shape)
print ('Y_train:',Y_train.shape)
print ()
print ('X_val:',X_val.shape)
print ('Y_val:',Y_val.shape)
print ()
print ('X_test:',X_test.shape)
print ('Y_test:',Y_test.shape)

X_train: (56092, 1, 13)
Y_train: (56092,)

X_val: (14024, 1, 13)
Y_val: (14024,)

X_test: (17529, 1, 13)
Y_test: (17529,)


# CNN EPOCHS

In [None]:
# Define the function to create the CNN model
def create_model():
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=1, activation='relu', input_shape=(1, 13)))
    model.add(MaxPooling1D(pool_size=1))
    model.add(Dense(1))  
    model.compile(optimizer='adam', loss='mse')
    return model

In [None]:
model = KerasRegressor(model=create_model, loss="mse", batch_size=16, verbose=0)
# define the grid search parameters
epochs = [20, 30, 40, 50, 60]
param_grid = dict( epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, Y_train)

In [None]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

# CNN FILTERS

In [None]:
# Define the function to create the CNN model
def create_model(filters):
    model = Sequential()
    model.add(Conv1D(filters=filters, kernel_size=1, activation='relu', input_shape=(1, 13)))
    model.add(MaxPooling1D(pool_size=1))
    model.add(Dense(1))  
    model.compile(optimizer='adam', loss='mse')
    return model

In [None]:
model = KerasRegressor(model=create_model, loss="mse", epochs=50, batch_size=16, verbose=0)
# define the grid search parameters
filters = [32, 64, 128, 256]
param_grid = dict(model__filters=filters)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, Y_train)


In [None]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

# CNN BATCH SIZE

In [None]:
# Define the function to create the CNN model
def create_model():
    model = Sequential()
    model.add(Conv1D(filters=256, kernel_size=1, activation='relu', input_shape=(1, 13)))
    model.add(MaxPooling1D(pool_size=1))
    model.add(Dense(1))  
    model.compile(optimizer='adam', loss='mse')
    return model

In [None]:
model = KerasRegressor(model=create_model, loss="mse", epochs=50, verbose=0)
# define the grid search parameters
batch_size = [16, 32, 64,128, 256]
param_grid = dict( batch_size=batch_size)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, Y_train)

In [None]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

# CNN HIDDEN LAYERS

In [None]:
def create_model( hidden_layers = 1):
  # Initialize the constructor
  model = Sequential()
  # Add an input layer
  model.add(Conv1D(filters=256, kernel_size=1, activation='relu', input_shape=(1, 13)))
  model.add(MaxPooling1D(pool_size=1))

  for i in range(hidden_layers):
      # Add one hidden layer
      model.add(Conv1D(filters=256, kernel_size=1, activation='relu'))
      model.add(MaxPooling1D(pool_size=1))
        
  # Add an output layer 
  model.add(Dense(1, activation = 'sigmoid'))
  #compile model
  model.compile(loss='mse', optimizer='adam')
  return model

In [None]:
model = KerasRegressor(model=create_model, loss="mse", hidden_layers=1, epochs=50, batch_size=16, verbose=0)
# define the grid search parameters
hidden_layers = [1, 2, 4, 6]
param_grid = dict(hidden_layers=hidden_layers)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, Y_train)

In [None]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

# Dropout

In [16]:
# Define the function to create the CNN model
def create_model(dropout_rate):
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=1, activation='relu', input_shape=(1, 13)))
    model.add(MaxPooling1D(pool_size=1))
    model.add(Dropout(dropout_rate))
    model.add(Conv1D(filters=64, kernel_size=1, activation='relu'))
    model.add(MaxPooling1D(pool_size=1))
    model.add(Dropout(dropout_rate))
    model.add(Conv1D(filters=64, kernel_size=1, activation='relu'))
    model.add(MaxPooling1D(pool_size=1))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1)) 
    model.compile(optimizer='adam', loss='mse')
    return model

In [19]:
model = KerasRegressor(model=create_model, loss="mse", epochs=50, batch_size=16, verbose=0)
# define the grid search parameters
dropout_rate = [0.2, 0.4, 0.6, 0.8]
param_grid = dict(model__dropout_rate=dropout_rate)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, Y_train)

KeyboardInterrupt: 

In [None]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
# Define the function to create the CNN model
def create_model(GaussianNoise_rate):
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=1, activation='relu', input_shape=(1, 13)))
    model.add(MaxPooling1D(pool_size=1))
    odel.add(GaussianNoise(GaussianNoise_rate))
    model.add(Conv1D(filters=64, kernel_size=1, activation='relu'))
    model.add(MaxPooling1D(pool_size=1))
    odel.add(GaussianNoise(GaussianNoise_rate))
    model.add(Conv1D(filters=64, kernel_size=1, activation='relu'))
    model.add(MaxPooling1D(pool_size=1))
    odel.add(GaussianNoise(GaussianNoise_rate))
    model.add(Dense(1)) 
    model.compile(optimizer='adam', loss='mse')
    return model

In [None]:
model = KerasRegressor(model=create_model, loss="mse", epochs=16, batch_size=32, verbose=0)
# define the grid search parameters
GaussianNoise_rate = [0.2, 0.4, 0.6, 0.8]
param_grid = dict(model__GaussianNoise_rate=GaussianNoise_rate)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, Y_train)

In [None]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))