Predicting the Weather with Neural Networks
===========================================


Example neural network

![title](img/ANN_with_numbers.png)

Import libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV

Read a CSV data file.

In [2]:
df = pd.read_csv('weatherPerth.csv')
df.head(10)

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RISK_MM,RainTomorrow
0,2008-07-01,Perth,2.7,18.8,0.0,0.8,9.1,ENE,20.0,,...,53.0,1027.6,1024.5,2.0,3.0,8.5,18.1,No,0.0,No
1,2008-07-02,Perth,6.4,20.7,0.0,1.8,7.0,NE,22.0,ESE,...,39.0,1024.1,1019.0,0.0,6.0,11.1,19.7,No,0.4,No
2,2008-07-03,Perth,6.5,19.9,0.4,2.2,7.3,NE,31.0,,...,71.0,1016.8,1015.6,1.0,3.0,12.1,17.7,No,1.8,Yes
3,2008-07-04,Perth,9.5,19.2,1.8,1.2,4.7,W,26.0,NNE,...,73.0,1019.3,1018.4,6.0,6.0,13.2,17.7,Yes,1.8,Yes
4,2008-07-05,Perth,9.5,16.4,1.8,1.4,4.9,WSW,44.0,W,...,57.0,1020.4,1022.1,7.0,5.0,15.9,16.0,Yes,6.8,Yes
5,2008-07-06,Perth,0.7,15.9,6.8,2.4,9.3,NNE,24.0,ENE,...,41.0,1032.0,1029.6,0.0,1.0,6.9,15.5,Yes,0.0,No
6,2008-07-07,Perth,0.7,18.3,0.0,0.8,9.3,N,37.0,NE,...,36.0,1028.9,1024.2,1.0,5.0,8.7,17.9,No,0.0,No
7,2008-07-08,Perth,3.2,20.4,0.0,1.4,6.9,NNW,24.0,NE,...,42.0,1023.9,1021.1,6.0,5.0,10.2,19.3,No,8.0,Yes
8,2008-07-09,Perth,9.8,19.5,8.0,1.2,2.5,ESE,31.0,,...,64.0,1026.4,1024.9,7.0,6.0,12.1,18.7,Yes,4.6,Yes
9,2008-07-10,Perth,11.2,20.4,4.6,2.8,1.7,NNW,46.0,NE,...,50.0,1020.2,1014.0,7.0,7.0,13.4,19.0,Yes,2.6,Yes


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3193 entries, 0 to 3192
Data columns (total 24 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Date           3193 non-null   object 
 1   Location       3193 non-null   object 
 2   MinTemp        3193 non-null   float64
 3   MaxTemp        3192 non-null   float64
 4   Rainfall       3193 non-null   float64
 5   Evaporation    3192 non-null   float64
 6   Sunshine       3188 non-null   float64
 7   WindGustDir    3188 non-null   object 
 8   WindGustSpeed  3188 non-null   float64
 9   WindDir9am     3059 non-null   object 
 10  WindDir3pm     3186 non-null   object 
 11  WindSpeed9am   3193 non-null   int64  
 12  WindSpeed3pm   3192 non-null   float64
 13  Humidity9am    3184 non-null   float64
 14  Humidity3pm    3185 non-null   float64
 15  Pressure9am    3192 non-null   float64
 16  Pressure3pm    3192 non-null   float64
 17  Cloud9am       3191 non-null   float64
 18  Cloud3pm

In [4]:
df.isnull().sum()

Date               0
Location           0
MinTemp            0
MaxTemp            1
Rainfall           0
Evaporation        1
Sunshine           5
WindGustDir        5
WindGustSpeed      5
WindDir9am       134
WindDir3pm         7
WindSpeed9am       0
WindSpeed3pm       1
Humidity9am        9
Humidity3pm        8
Pressure9am        1
Pressure3pm        1
Cloud9am           2
Cloud3pm           4
Temp9am            0
Temp3pm            1
RainToday          0
RISK_MM            0
RainTomorrow       0
dtype: int64

Pre-process the data.  First, remove unwanted variables.

In [5]:
exclude = ['Date', 'RISK_MM', 'Location']
df.drop(exclude,axis=1,inplace=True)


In [6]:
df.shape

(3193, 21)

Dealing with missing values.

In [7]:
df=df.dropna()

Boolean variables to 0s and 1s.

In [8]:
bools = ['RainToday', 'RainTomorrow']
for col in bools:
    df[col]=df[col].map({
        'Yes':1,
        'No':0
    })


In [10]:
df

Unnamed: 0,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,WindDir3pm,WindSpeed9am,...,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow
1,6.4,20.7,0.0,1.8,7.0,NE,22.0,ESE,ENE,6,...,80.0,39.0,1024.1,1019.0,0.0,6.0,11.1,19.7,0,0
3,9.5,19.2,1.8,1.2,4.7,W,26.0,NNE,NNW,11,...,93.0,73.0,1019.3,1018.4,6.0,6.0,13.2,17.7,1,1
4,9.5,16.4,1.8,1.4,4.9,WSW,44.0,W,SW,13,...,69.0,57.0,1020.4,1022.1,7.0,5.0,15.9,16.0,1,1
5,0.7,15.9,6.8,2.4,9.3,NNE,24.0,ENE,NE,4,...,86.0,41.0,1032.0,1029.6,0.0,1.0,6.9,15.5,1,0
6,0.7,18.3,0.0,0.8,9.3,N,37.0,NE,NNE,15,...,72.0,36.0,1028.9,1024.2,1.0,5.0,8.7,17.9,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3188,10.3,19.9,0.2,1.8,7.5,NW,37.0,NNE,NNW,9,...,89.0,60.0,1017.1,1013.8,5.0,6.0,13.0,18.5,0,1
3189,13.0,16.8,61.2,3.6,0.0,SSW,46.0,W,SW,17,...,90.0,75.0,1005.6,1008.9,7.0,7.0,16.4,15.6,1,0
3190,13.3,18.9,0.4,1.8,6.5,SE,37.0,SE,ESE,11,...,85.0,65.0,1019.2,1019.4,6.0,6.0,15.1,18.0,0,0
3191,11.5,18.2,0.0,3.8,9.3,SE,30.0,ESE,ESE,9,...,62.0,47.0,1025.9,1023.4,1.0,3.0,14.0,17.6,0,0


Cyclical attributes

![title](img/cardinal.png)

Map Cardinal Direction to Radians

In [11]:
dirs = ['N','NNE','NE','ENE','E','ESE','SE','SSE','S','SSW','SW','WSW','W','WNW','NW','NNW']
angles = np.arange(0.0, 2.0*np.pi, 2.0*np.pi / 16.0)
wind_angles = dict(zip(dirs, angles))
print(wind_angles)

{'N': 0.0, 'NNE': 0.39269908169872414, 'NE': 0.7853981633974483, 'ENE': 1.1780972450961724, 'E': 1.5707963267948966, 'ESE': 1.9634954084936207, 'SE': 2.356194490192345, 'SSE': 2.748893571891069, 'S': 3.141592653589793, 'SSW': 3.5342917352885173, 'SW': 3.9269908169872414, 'WSW': 4.319689898685965, 'W': 4.71238898038469, 'WNW': 5.105088062083414, 'NW': 5.497787143782138, 'NNW': 5.890486225480862}


Replace cyclical attributes with sin() and cos()

In [13]:
wind_attributes = ['WindGustDir', 'WindDir9am', 'WindDir3pm']
for col in wind_attributes:
    df[col] = df[col].map(wind_angles)
    df[col+'_cos']=np.cos(df[col])
    df[col+'_sin']=np.sin(df[col])
    df = df.drop(col,axis=1)


In [14]:
df.head(10)

Unnamed: 0,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustSpeed,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,...,Temp9am,Temp3pm,RainToday,RainTomorrow,WindGustDir_cos,WindGustDir_sin,WindDir9am_cos,WindDir9am_sin,WindDir3pm_cos,WindDir3pm_sin
1,6.4,20.7,0.0,1.8,7.0,22.0,6,9.0,80.0,39.0,...,11.1,19.7,0,0,0.7071068,0.707107,-0.3826834,0.92388,0.3826834,0.92388
3,9.5,19.2,1.8,1.2,4.7,26.0,11,6.0,93.0,73.0,...,13.2,17.7,1,1,-1.83697e-16,-1.0,0.9238795,0.382683,0.9238795,-0.382683
4,9.5,16.4,1.8,1.4,4.9,44.0,13,17.0,69.0,57.0,...,15.9,16.0,1,1,-0.3826834,-0.92388,-1.83697e-16,-1.0,-0.7071068,-0.707107
5,0.7,15.9,6.8,2.4,9.3,24.0,4,7.0,86.0,41.0,...,6.9,15.5,1,0,0.9238795,0.382683,0.3826834,0.92388,0.7071068,0.707107
6,0.7,18.3,0.0,0.8,9.3,37.0,15,13.0,72.0,36.0,...,8.7,17.9,0,0,1.0,0.0,0.7071068,0.707107,0.9238795,0.382683
7,3.2,20.4,0.0,1.4,6.9,24.0,9,13.0,58.0,42.0,...,10.2,19.3,0,1,0.9238795,-0.382683,0.7071068,0.707107,1.0,0.0
9,11.2,20.4,4.6,2.8,1.7,46.0,19,11.0,79.0,50.0,...,13.4,19.0,1,1,0.9238795,-0.382683,0.7071068,0.707107,0.9238795,0.382683
10,13.4,17.1,2.6,2.6,4.2,54.0,26,13.0,63.0,77.0,...,15.4,14.8,1,1,-0.7071068,-0.707107,-0.7071068,-0.707107,-0.7071068,-0.707107
11,7.9,17.3,4.4,2.2,5.4,19.0,7,9.0,99.0,59.0,...,9.6,16.2,1,0,-0.9238795,-0.382683,1.0,0.0,-0.9238795,-0.382683
12,5.3,18.8,0.2,1.0,2.3,17.0,7,7.0,96.0,63.0,...,9.5,17.2,0,0,1.0,0.0,0.7071068,0.707107,-1.83697e-16,-1.0


Extract attributes (X) and class labels (y).

In [16]:
y = df['RainTomorrow']
X = df.drop('RainTomorrow',axis=1)

Unnamed: 0,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustSpeed,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,...,Cloud3pm,Temp9am,Temp3pm,RainToday,WindGustDir_cos,WindGustDir_sin,WindDir9am_cos,WindDir9am_sin,WindDir3pm_cos,WindDir3pm_sin
1,6.4,20.7,0.0,1.8,7.0,22.0,6,9.0,80.0,39.0,...,6.0,11.1,19.7,0,0.7071068,0.707107,-0.3826834,0.92388,0.382683,0.92388
3,9.5,19.2,1.8,1.2,4.7,26.0,11,6.0,93.0,73.0,...,6.0,13.2,17.7,1,-1.83697e-16,-1.0,0.9238795,0.382683,0.92388,-0.382683
4,9.5,16.4,1.8,1.4,4.9,44.0,13,17.0,69.0,57.0,...,5.0,15.9,16.0,1,-0.3826834,-0.92388,-1.83697e-16,-1.0,-0.707107,-0.707107
5,0.7,15.9,6.8,2.4,9.3,24.0,4,7.0,86.0,41.0,...,1.0,6.9,15.5,1,0.9238795,0.382683,0.3826834,0.92388,0.707107,0.707107
6,0.7,18.3,0.0,0.8,9.3,37.0,15,13.0,72.0,36.0,...,5.0,8.7,17.9,0,1.0,0.0,0.7071068,0.707107,0.92388,0.382683


Split dataset into training and testing subsets.

In [17]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.33,random_state=0)

print('X_train : ',X_train.shape)
print('X_test : ',X_test.shape)
print('y_train : ',y_train.shape)
print('y_test : ',y_test.shape)

X_train :  (2026, 23)
X_test :  (999, 23)
y_train :  (2026,)
y_test :  (999,)


Scale.

In [19]:
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

Instantiate a neural network and train it.

![title](img/ANN_2_layers.png)

Input layer size.

In [20]:
X_train.shape

(2026, 23)

Instantiate a neural network and train it.

In [22]:
model = MLPClassifier(
    hidden_layer_sizes=(50,50),
    random_state=0,
    max_iter=500,
)
model.fit(X_train,y_train)

MLPClassifier(hidden_layer_sizes=(50, 50), max_iter=500, random_state=0)

Predict target class for the testing set.

In [23]:
y_pred = model.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.7927927927927928


Search for best network layout.

In [24]:
p ={
    'hidden_layer_sizes':((2,),(5,),(10,),(10,10),(20,40),(30,30,30))
}
model = MLPClassifier(
    max_iter=3000,
    random_state=0,
)
gs = GridSearchCV(model,p,cv=6)
gs.fit(X_train,y_train)

GridSearchCV(cv=6, estimator=MLPClassifier(max_iter=3000, random_state=0),
             param_grid={'hidden_layer_sizes': ((2,), (5,), (10,), (10, 10),
                                                (20, 40), (30, 30, 30))})

Display grid search results.

In [25]:
print(gs.cv_results_['params'])
print(gs.cv_results_['mean_test_score'])

[{'hidden_layer_sizes': (2,)}, {'hidden_layer_sizes': (5,)}, {'hidden_layer_sizes': (10,)}, {'hidden_layer_sizes': (10, 10)}, {'hidden_layer_sizes': (20, 40)}, {'hidden_layer_sizes': (30, 30, 30)}]
[0.90424707 0.90621068 0.89287951 0.88992093 0.87561527 0.88054331]



Predictions using the best neural network.

In [26]:
best_model = gs.best_estimator_
y_pred = best_model.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.7927927927927928
