In [3]:
import math
import matplotlib.pyplot as plt
import numpy as np
from numpy.random import seed

seed(1)
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

import tensorflow
tensorflow.random.set_seed(1)
from tensorflow.python.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.wrappers.scikit_learn import KerasRegressor 

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.models import Model, Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping

In [4]:
df=pd.read_excel('data.xlsx')
df.head()

Unnamed: 0,Date,Temp,Dew Point,Humidity,Wind Speed,Pressure,Wind Direction,Pressure Grad,Wind Gradient
0,2020-01-01,5.7,4.8,94.4,2.47,1029.0,128,-8,3
1,2020-01-02,9.3,8.2,92.5,7.42,1021.4,188,0,8
2,2020-01-03,8.2,6.8,91.4,6.81,1021.8,265,11,7
3,2020-01-04,5.6,3.6,87.2,3.94,1033.7,248,-1,2
4,2020-01-05,7.8,6.1,89.2,3.33,1033.4,224,-11,5


In [5]:
df['Year']  = df['Date'].apply(lambda x: int(str(x)[:4]))
df['Month'] = df['Date'].apply(lambda x: int(str(x)[5:7]))
df['Day']=df['Date'].apply(lambda x: int(str(x)[8:10]))
df.head()

Unnamed: 0,Date,Temp,Dew Point,Humidity,Wind Speed,Pressure,Wind Direction,Pressure Grad,Wind Gradient,Year,Month,Day
0,2020-01-01,5.7,4.8,94.4,2.47,1029.0,128,-8,3,2020,1,1
1,2020-01-02,9.3,8.2,92.5,7.42,1021.4,188,0,8,2020,1,2
2,2020-01-03,8.2,6.8,91.4,6.81,1021.8,265,11,7,2020,1,3
3,2020-01-04,5.6,3.6,87.2,3.94,1033.7,248,-1,2,2020,1,4
4,2020-01-05,7.8,6.1,89.2,3.33,1033.4,224,-11,5,2020,1,5


In [6]:
del df['Date']
del df['Year']
del df['Month']
df.head()

Unnamed: 0,Temp,Dew Point,Humidity,Wind Speed,Pressure,Wind Direction,Pressure Grad,Wind Gradient,Day
0,5.7,4.8,94.4,2.47,1029.0,128,-8,3,1
1,9.3,8.2,92.5,7.42,1021.4,188,0,8,2
2,8.2,6.8,91.4,6.81,1021.8,265,11,7,3
3,5.6,3.6,87.2,3.94,1033.7,248,-1,2,4
4,7.8,6.1,89.2,3.33,1033.4,224,-11,5,5


In [7]:
X = df.drop(['Wind Speed'], axis=1)
#Assign the Target column as the output 
Y= df['Wind Speed']

In [8]:
X_norm=(X-X.min())/(X.max()-X.min())
X_norm

Unnamed: 0,Temp,Dew Point,Humidity,Pressure,Wind Direction,Pressure Grad,Wind Gradient,Day
0,0.299639,0.487805,0.923913,0.752055,0.342679,0.449275,0.200000,0.000000
1,0.429603,0.606272,0.898098,0.647945,0.529595,0.565217,0.533333,0.033333
2,0.389892,0.557491,0.883152,0.653425,0.769470,0.724638,0.466667,0.066667
3,0.296029,0.445993,0.826087,0.816438,0.716511,0.550725,0.133333,0.100000
4,0.375451,0.533101,0.853261,0.812329,0.641745,0.405797,0.333333,0.133333
...,...,...,...,...,...,...,...,...
1091,0.198556,0.411150,0.972826,0.706849,0.676012,0.623188,0.066667,0.866667
1092,0.259928,0.470383,0.966033,0.767123,0.757009,0.594203,0.200000,0.900000
1093,0.415162,0.595819,0.915761,0.795890,0.738318,0.594203,0.266667,0.933333
1094,0.346570,0.564460,0.998641,0.831507,0.753894,0.565217,0.200000,0.966667


In [9]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X_norm, Y, test_size=0.3, random_state=0)

In [10]:
model = Sequential()
model.add(Dense(32, input_dim=x_train.shape[1], activation="sigmoid", kernel_initializer='normal'))
model.add(Dropout(0.2)) #dropping a few neurons for generalizing the model     
model.add(Dense(1, activation="linear", kernel_initializer='normal'))
adam = Adam(learning_rate=1e-3, decay=1e-3)

# Compile model
model.compile(loss="mean_squared_error", optimizer='adam', metrics=['mse','mae'])

In [12]:
print('Fit model...')
filepath="/home/m-marouni/Documents/CE-901/Heathrow/best_weights"
checkpoint = ModelCheckpoint(filepath, monitor='val_mae', verbose=1, save_best_only=True, mode='min')
early_stopping = EarlyStopping(monitor='val_mae', patience=100, verbose=1, mode='min')
callbacks_list = [checkpoint, early_stopping]

log = model.fit(x_train, y_train,
          validation_split=0.40, batch_size=30, epochs=1000, shuffle=True, callbacks=callbacks_list)

Fit model...
Epoch 1/1000

Epoch 00001: val_mae improved from inf to 1.78215, saving model to /home/m-marouni/Documents/CE-901/Heathrow/best_weights
INFO:tensorflow:Assets written to: /home/m-marouni/Documents/CE-901/Heathrow/best_weights/assets
Epoch 2/1000

Epoch 00002: val_mae improved from 1.78215 to 1.78171, saving model to /home/m-marouni/Documents/CE-901/Heathrow/best_weights
INFO:tensorflow:Assets written to: /home/m-marouni/Documents/CE-901/Heathrow/best_weights/assets
Epoch 3/1000

Epoch 00003: val_mae did not improve from 1.78171
Epoch 4/1000

Epoch 00004: val_mae improved from 1.78171 to 1.77342, saving model to /home/m-marouni/Documents/CE-901/Heathrow/best_weights
INFO:tensorflow:Assets written to: /home/m-marouni/Documents/CE-901/Heathrow/best_weights/assets
Epoch 5/1000

Epoch 00005: val_mae did not improve from 1.77342
Epoch 6/1000

Epoch 00006: val_mae did not improve from 1.77342
Epoch 7/1000

Epoch 00007: val_mae improved from 1.77342 to 1.77018, saving model to /ho

In [16]:
df_test=pd.read_excel('Test-july-London.xlsx')
df_test.head()

Unnamed: 0,Date,Temp,Dewpoint,Humidity,Wind Speed,Pressure,Wind Direction,Pressure Gradient,Wind Gradient
0,2021-07-01,17.4,10.5,64.4,3,1017.1,242,-2,2
1,2021-07-02,20.0,12.7,63.6,3,1015.3,176,-4,2
2,2021-07-03,18.4,14.7,79.5,5,1011.5,171,-5,4
3,2021-07-04,17.8,13.9,78.7,6,1007.3,119,-4,5
4,2021-07-05,17.1,11.6,71.5,7,1004.2,63,-5,4


In [17]:
df_test['Year']  = df_test['Date'].apply(lambda x: int(str(x)[:4]))
df_test['Month'] = df_test['Date'].apply(lambda x: int(str(x)[5:7]))
df_test['Day']=df_test['Date'].apply(lambda x: int(str(x)[8:10]))
df_test.head()

Unnamed: 0,Date,Temp,Dewpoint,Humidity,Wind Speed,Pressure,Wind Direction,Pressure Gradient,Wind Gradient,Year,Month,Day
0,2021-07-01,17.4,10.5,64.4,3,1017.1,242,-2,2,2021,7,1
1,2021-07-02,20.0,12.7,63.6,3,1015.3,176,-4,2,2021,7,2
2,2021-07-03,18.4,14.7,79.5,5,1011.5,171,-5,4,2021,7,3
3,2021-07-04,17.8,13.9,78.7,6,1007.3,119,-4,5,2021,7,4
4,2021-07-05,17.1,11.6,71.5,7,1004.2,63,-5,4,2021,7,5


In [18]:
del df_test['Year']
del df_test['Month']
del df_test['Date']

In [19]:
X2 = df_test.drop(['Wind Speed'], axis=1)
#Assign the Target column as the output 
Y2 = df_test['Wind Speed']

In [20]:
X2_norm=(X2-X2.min())/(X2.max()-X2.min())
X2_norm

Unnamed: 0,Temp,Dewpoint,Humidity,Pressure,Wind Direction,Pressure Gradient,Wind Gradient,Day
0,0.275862,0.0,0.12987,0.625442,0.94709,0.1875,0.0,0.0
1,0.724138,0.52381,0.095238,0.561837,0.597884,0.0625,0.0,0.0625
2,0.448276,1.0,0.78355,0.427562,0.571429,0.0,0.5,0.125
3,0.344828,0.809524,0.748918,0.279152,0.296296,0.0625,0.75,0.1875
4,0.224138,0.261905,0.437229,0.169611,0.0,0.0,0.5,0.25
5,0.0,0.380952,0.761905,0.0,0.57672,1.0,1.0,0.3125
6,0.206897,0.428571,0.558442,0.420495,0.37037,0.75,0.75,0.375
7,0.413793,0.619048,0.493506,0.689046,0.603175,0.375,0.25,0.4375
8,0.637931,0.666667,0.329004,0.724382,0.539683,0.0625,0.5,0.5
9,0.12069,0.238095,0.480519,0.586572,0.386243,0.1875,0.0,0.5625


In [21]:
new_predictions=model.predict(X2_norm)

In [22]:
new_predictions

array([[ 3.993516 ],
       [ 4.2683525],
       [ 7.1566286],
       [ 9.880888 ],
       [ 9.368763 ],
       [12.502867 ],
       [10.011696 ],
       [ 5.0256653],
       [ 6.4769526],
       [ 4.1227474],
       [ 5.305939 ],
       [ 4.15361  ],
       [ 3.9603145],
       [ 3.9203653],
       [ 6.3731923],
       [ 4.313264 ],
       [ 4.464552 ]], dtype=float32)

In [23]:
mean_absolute_error(Y2, new_predictions)

1.561056095011094

In [24]:
mean_squared_error(Y2, new_predictions)

3.8032586859098454