In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV

In [2]:
df = pd.read_csv("weatherAUS.csv")
df.head()

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow
0,2008-12-01,Albury,13.4,22.9,0.6,,,W,44.0,W,...,71.0,22.0,1007.7,1007.1,8.0,,16.9,21.8,No,No
1,2008-12-02,Albury,7.4,25.1,0.0,,,WNW,44.0,NNW,...,44.0,25.0,1010.6,1007.8,,,17.2,24.3,No,No
2,2008-12-03,Albury,12.9,25.7,0.0,,,WSW,46.0,W,...,38.0,30.0,1007.6,1008.7,,2.0,21.0,23.2,No,No
3,2008-12-04,Albury,9.2,28.0,0.0,,,NE,24.0,SE,...,45.0,16.0,1017.6,1012.8,,,18.1,26.5,No,No
4,2008-12-05,Albury,17.5,32.3,1.0,,,W,41.0,ENE,...,82.0,33.0,1010.8,1006.0,7.0,8.0,17.8,29.7,No,No


In [5]:
dirs = ['N' , 'NNE' , 'NE' , 'ENE' , 'E' , 'ESE' , 'SE' , 'SSE' , 'S' , 'SSW' , 'SW' , 'WSW' , 'W' , 'WW' , 'NW' , 'NNW']
# dirs is a cyclic attribute so we cant just convert it into numberical data.
# so we are trying to convert it into radians and then the sin and cosine values
angles = np.arange(0.0 , 2.0*np.pi , 2.0*np.pi/16)
wind_angles = dict(zip(dirs,angles))
print(wind_angles)

{'N': 0.0, 'NNE': 0.39269908169872414, 'NE': 0.7853981633974483, 'ENE': 1.1780972450961724, 'E': 1.5707963267948966, 'ESE': 1.9634954084936207, 'SE': 2.356194490192345, 'SSE': 2.748893571891069, 'S': 3.141592653589793, 'SSW': 3.5342917352885173, 'SW': 3.9269908169872414, 'WSW': 4.319689898685965, 'W': 4.71238898038469, 'WW': 5.105088062083414, 'NW': 5.497787143782138, 'NNW': 5.890486225480862}


In [7]:
wind_attributes = ['WindGustDir' , 'WindDir9am' , 'WindDir3pm']
for att in wind_attributes:
    df[att] = df[att].map(wind_angles)
    df[att + '_cos'] = np.cos(df[att])
    df[att + '_sin'] = np.sin(df[att])
    df = df.drop(columns = att)
df.head()

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustSpeed,WindSpeed9am,WindSpeed3pm,...,Temp9am,Temp3pm,RainToday,RainTomorrow,WindGustDir_cos,WindGustDir_sin,WindDir9am_cos,WindDir9am_sin,WindDir3pm_cos,WindDir3pm_sin
0,2008-12-01,Albury,13.4,22.9,0.6,,,44.0,20.0,24.0,...,16.9,21.8,No,No,-1.83697e-16,-1.0,-1.83697e-16,-1.0,,
1,2008-12-02,Albury,7.4,25.1,0.0,,,44.0,4.0,22.0,...,17.2,24.3,No,No,,,0.9238795,-0.382683,-0.3826834,-0.92388
2,2008-12-03,Albury,12.9,25.7,0.0,,,46.0,19.0,26.0,...,21.0,23.2,No,No,-0.3826834,-0.92388,-1.83697e-16,-1.0,-0.3826834,-0.92388
3,2008-12-04,Albury,9.2,28.0,0.0,,,24.0,11.0,9.0,...,18.1,26.5,No,No,0.7071068,0.707107,-0.7071068,0.707107,6.123234000000001e-17,1.0
4,2008-12-05,Albury,17.5,32.3,1.0,,,41.0,7.0,20.0,...,17.8,29.7,No,No,-1.83697e-16,-1.0,0.3826834,0.92388,0.7071068,-0.707107


In [None]:
# 5 instantiate 
nn = MLPClassifier(hidden_layer_sizes = (50 , 50) , random_state = 0 , max_iter = 500)
# by this we have define only 2 hidden layers and each layer contains 50 neurons , max_iter is basically the epoch
nn.fit(x_train , y_train)

In [None]:
#6 predicting the output
y_pred = nn.predict(x_test)
print(accuracy_score(y_test , y_pred))

In [None]:
# GrisSearch is used to try different layer - neuron combination and then find the best one out with the given options
# here the option givens are 2 layers with 2 and 10 neurons respectively 
# and another with both 50 neurons

# 1 hidden layer
p = { 'hidden_layer_sizes' : ((2,) , (10,) , (50,50))}

#2 layer
nn= MLPClassifier(max_iter = 2000 , random_state = 0)

#3
gs = GridSearchCV(nn , p , cv = 3)
gs.fit(x_train , y_train)

#4 print GS result
print(gs.cv_results_['params'])
print(gs.cv_results_['mean_test_score'])