In [1]:
import math
import matplotlib.pyplot as plt
import numpy as np
from numpy.random import seed

seed(1)
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

import tensorflow
tensorflow.random.set_seed(1)
from tensorflow.python.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.wrappers.scikit_learn import KerasRegressor 

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.models import Model, Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping

In [2]:
df=pd.read_excel('Pre-Processed-Data.xlsx')
df.head()

Unnamed: 0,Wind Speed,Pressure,Pressure Grad,Wind Gradient
0,2.47,1029.0,-8,3
1,7.42,1021.4,0,8
2,6.81,1021.8,11,7
3,3.94,1033.7,-1,2
4,3.33,1033.4,-11,5


In [3]:
len(df)

1096

In [4]:
X = df.drop(['Wind Speed'], axis=1)
#Assign the Target column as the output 
Y= df['Wind Speed']

In [5]:
X_norm=(X-X.min())/(X.max()-X.min())
X_norm

Unnamed: 0,Pressure,Pressure Grad,Wind Gradient
0,0.752055,0.449275,0.200000
1,0.647945,0.565217,0.533333
2,0.653425,0.724638,0.466667
3,0.816438,0.550725,0.133333
4,0.812329,0.405797,0.333333
...,...,...,...
1091,0.706849,0.623188,0.066667
1092,0.767123,0.594203,0.200000
1093,0.795890,0.594203,0.266667
1094,0.831507,0.565217,0.200000


In [6]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X_norm, Y, test_size=0.3, random_state=0)

In [7]:
model = Sequential()
model.add(Dense(32, input_dim=x_train.shape[1], activation="sigmoid", kernel_initializer='normal'))
model.add(Dropout(0.2)) #dropping a few neurons for generalizing the model
model.add(Dense(1, activation="linear", kernel_initializer='normal'))
adam = Adam(learning_rate=1e-3, decay=1e-3)

# Compile model
model.compile(loss="mean_squared_error", optimizer='adam', metrics=['mse','mae'])

In [8]:
print('Fit model...')
filepath="/home/m-marouni/Documents/CE-901/Heathrow/best_weights"
checkpoint = ModelCheckpoint(filepath, monitor='val_mae', verbose=1, save_best_only=True, mode='min')
early_stopping = EarlyStopping(monitor='val_mae', patience=100, verbose=1, mode='min')
callbacks_list = [checkpoint, early_stopping]

log = model.fit(x_train, y_train,
          validation_split=0.40, batch_size=30, epochs=1000, shuffle=True, callbacks=callbacks_list)

Fit model...
Epoch 1/1000

Epoch 00001: val_mae improved from inf to 5.53338, saving model to /home/m-marouni/Documents/CE-901/Heathrow/best_weights
INFO:tensorflow:Assets written to: /home/m-marouni/Documents/CE-901/Heathrow/best_weights/assets
Epoch 2/1000

Epoch 00002: val_mae improved from 5.53338 to 5.25443, saving model to /home/m-marouni/Documents/CE-901/Heathrow/best_weights
INFO:tensorflow:Assets written to: /home/m-marouni/Documents/CE-901/Heathrow/best_weights/assets
Epoch 3/1000

Epoch 00003: val_mae improved from 5.25443 to 4.97560, saving model to /home/m-marouni/Documents/CE-901/Heathrow/best_weights
INFO:tensorflow:Assets written to: /home/m-marouni/Documents/CE-901/Heathrow/best_weights/assets
Epoch 4/1000

Epoch 00004: val_mae improved from 4.97560 to 4.69550, saving model to /home/m-marouni/Documents/CE-901/Heathrow/best_weights
INFO:tensorflow:Assets written to: /home/m-marouni/Documents/CE-901/Heathrow/best_weights/assets
Epoch 5/1000

Epoch 00005: val_mae improved

In [48]:
df_test = pd.read_excel('test-data.xlsx')
df_test.head()

Unnamed: 0,Date,Temp,Dew Point,Humidity,Wind Speed,Pressure,Wind Direction,Pressure Gradient,Wind Gradient
0,2021-01-01,2.7,0.8,91.47,5,1010.8,293.75,3,4
1,2021-01-02,3.2,3.1,82.71,5,1014.4,306.38,1,4
2,2021-01-03,4.3,2.6,84.5,7,1015.7,105.71,0,5
3,2021-01-04,4.1,3.3,80.73,10,1016.5,39.13,0,7
4,2021-01-05,3.9,3.4,83.66,7,1017.1,20.71,0,5


In [49]:
df_test['Year']  = df_test['Date'].apply(lambda x: int(str(x)[:4]))
df_test['Month'] = df_test['Date'].apply(lambda x: int(str(x)[5:7]))
df_test['Day']=df_test['Date'].apply(lambda x: int(str(x)[8:10]))
df_test.head()

Unnamed: 0,Date,Temp,Dew Point,Humidity,Wind Speed,Pressure,Wind Direction,Pressure Gradient,Wind Gradient,Year,Month,Day
0,2021-01-01,2.7,0.8,91.47,5,1010.8,293.75,3,4,2021,1,1
1,2021-01-02,3.2,3.1,82.71,5,1014.4,306.38,1,4,2021,1,2
2,2021-01-03,4.3,2.6,84.5,7,1015.7,105.71,0,5,2021,1,3
3,2021-01-04,4.1,3.3,80.73,10,1016.5,39.13,0,7,2021,1,4
4,2021-01-05,3.9,3.4,83.66,7,1017.1,20.71,0,5,2021,1,5


In [50]:
del df_test['Date']
del df_test['Year']
del df_test['Month']
df_test.head()

Unnamed: 0,Temp,Dew Point,Humidity,Wind Speed,Pressure,Wind Direction,Pressure Gradient,Wind Gradient,Day
0,2.7,0.8,91.47,5,1010.8,293.75,3,4,1
1,3.2,3.1,82.71,5,1014.4,306.38,1,4,2
2,4.3,2.6,84.5,7,1015.7,105.71,0,5,3
3,4.1,3.3,80.73,10,1016.5,39.13,0,7,4
4,3.9,3.4,83.66,7,1017.1,20.71,0,5,5


In [51]:
X2 = df_test.drop(['Wind Speed'], axis=1)
#Assign the Target column as the output 
Y2= df_test['Wind Speed']

In [52]:
X2_norm=(X2-X2.min())/(X2.max()-X2.min())
X2_norm

Unnamed: 0,Temp,Dew Point,Humidity,Pressure,Wind Direction,Pressure Gradient,Wind Gradient,Day
0,0.250000,0.505376,0.892120,0.459108,0.955788,0.612903,0.222222,0.000000
1,0.284722,0.629032,0.686679,0.526022,1.000000,0.548387,0.222222,0.033333
2,0.361111,0.602151,0.728659,0.550186,0.297546,0.516129,0.333333,0.066667
3,0.347222,0.639785,0.640244,0.565056,0.064480,0.516129,0.555556,0.100000
4,0.333333,0.645161,0.708959,0.576208,0.000000,0.516129,0.333333,0.133333
...,...,...,...,...,...,...,...,...
85,0.631944,0.655914,0.494137,0.657993,0.851647,0.612903,0.555556,0.866667
86,0.750000,0.849462,0.702861,0.723048,0.767634,0.580645,0.777778,0.900000
87,0.715278,0.854839,0.784944,0.775093,0.739630,0.483871,0.555556,0.933333
88,0.756944,0.741935,0.534006,0.765799,0.662618,0.290323,0.222222,0.966667


In [53]:
new_predictions=model.predict(X2_norm)

In [54]:
new_predictions

array([[ 5.3715024],
       [ 5.224792 ],
       [ 6.208459 ],
       [ 7.9523764],
       [ 6.3059316],
       [ 5.929008 ],
       [ 5.6938744],
       [ 4.6488967],
       [ 4.462004 ],
       [ 4.2317953],
       [ 6.3226433],
       [ 6.8128557],
       [ 4.1144824],
       [ 8.641611 ],
       [ 3.6358676],
       [ 6.2534885],
       [ 5.386528 ],
       [ 5.747332 ],
       [ 9.489639 ],
       [12.092268 ],
       [11.092757 ],
       [ 7.35812  ],
       [ 6.9331865],
       [ 7.8762875],
       [ 8.486825 ],
       [ 5.6867247],
       [ 4.5355787],
       [ 6.977001 ],
       [ 8.206758 ],
       [10.238571 ],
       [ 9.641021 ],
       [ 6.810918 ],
       [ 8.472464 ],
       [ 7.008192 ],
       [ 5.6214485],
       [ 5.0362177],
       [ 6.1486807],
       [10.816036 ],
       [ 9.505503 ],
       [10.66745  ],
       [ 7.236031 ],
       [ 6.5052757],
       [ 9.685202 ],
       [ 9.428844 ],
       [ 8.510004 ],
       [ 6.2615533],
       [ 7.037179 ],
       [ 7.75

In [55]:
mean_absolute_error(Y2, new_predictions)

2.2366616196102567

In [56]:
mean_squared_error(Y2, new_predictions)

8.550669381221887

In [60]:
df_test['Predicitions'] = new_predictions
#Save the dataframe as csv file

In [62]:
df_test['Predicitions'] = df_test['Predicitions'].apply(lambda x: int(x) if x == x else "")
df_test.to_csv('results102.xlsx', index = False)