In [276]:
import pandas as pd
import seaborn as sns
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense,Conv2D, MaxPool2D, Flatten, Dropout

In [277]:
df= pd.read_csv('../input/weather-dataset-rattle-package/weatherAUS.csv', parse_dates=['Date'])
df.set_index('Date', inplace=True)
df.shape

(145460, 22)

In [278]:
df.head(2)

Unnamed: 0_level_0,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,WindDir3pm,...,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2008-12-01,Albury,13.4,22.9,0.6,,,W,44.0,W,WNW,...,71.0,22.0,1007.7,1007.1,8.0,,16.9,21.8,No,No
2008-12-02,Albury,7.4,25.1,0.0,,,WNW,44.0,NNW,WSW,...,44.0,25.0,1010.6,1007.8,,,17.2,24.3,No,No


In [279]:
df.dtypes

Location          object
MinTemp          float64
MaxTemp          float64
Rainfall         float64
Evaporation      float64
Sunshine         float64
WindGustDir       object
WindGustSpeed    float64
WindDir9am        object
WindDir3pm        object
WindSpeed9am     float64
WindSpeed3pm     float64
Humidity9am      float64
Humidity3pm      float64
Pressure9am      float64
Pressure3pm      float64
Cloud9am         float64
Cloud3pm         float64
Temp9am          float64
Temp3pm          float64
RainToday         object
RainTomorrow      object
dtype: object

In [280]:
(df.isnull().sum()/df.shape[0])*100

Location          0.000000
MinTemp           1.020899
MaxTemp           0.866905
Rainfall          2.241853
Evaporation      43.166506
Sunshine         48.009762
WindGustDir       7.098859
WindGustSpeed     7.055548
WindDir9am        7.263853
WindDir3pm        2.906641
WindSpeed9am      1.214767
WindSpeed3pm      2.105046
Humidity9am       1.824557
Humidity3pm       3.098446
Pressure9am      10.356799
Pressure3pm      10.331363
Cloud9am         38.421559
Cloud3pm         40.807095
Temp9am           1.214767
Temp3pm           2.481094
RainToday         2.241853
RainTomorrow      2.245978
dtype: float64

In [281]:
if 'Evaporation' in df:
    df.drop(['Evaporation', 'Sunshine', 'Cloud9am', 'Cloud3pm'], axis=1, inplace=True)
df.isnull().sum()/df.shape[0]*100

Location          0.000000
MinTemp           1.020899
MaxTemp           0.866905
Rainfall          2.241853
WindGustDir       7.098859
WindGustSpeed     7.055548
WindDir9am        7.263853
WindDir3pm        2.906641
WindSpeed9am      1.214767
WindSpeed3pm      2.105046
Humidity9am       1.824557
Humidity3pm       3.098446
Pressure9am      10.356799
Pressure3pm      10.331363
Temp9am           1.214767
Temp3pm           2.481094
RainToday         2.241853
RainTomorrow      2.245978
dtype: float64

In [282]:
df.interpolate(method='time', inplace=True)
df.isnull().sum()/df.shape[0]*100

Location         0.000000
MinTemp          0.000000
MaxTemp          0.000000
Rainfall         0.000000
WindGustDir      7.098859
WindGustSpeed    0.000000
WindDir9am       7.263853
WindDir3pm       2.906641
WindSpeed9am     0.000000
WindSpeed3pm     0.000000
Humidity9am      0.000000
Humidity3pm      0.000000
Pressure9am      0.000000
Pressure3pm      0.000000
Temp9am          0.000000
Temp3pm          0.000000
RainToday        2.241853
RainTomorrow     2.245978
dtype: float64

In [283]:
df=df.dropna(how='any')
df.isnull().sum()/df.shape[0]*100

Location         0.0
MinTemp          0.0
MaxTemp          0.0
Rainfall         0.0
WindGustDir      0.0
WindGustSpeed    0.0
WindDir9am       0.0
WindDir3pm       0.0
WindSpeed9am     0.0
WindSpeed3pm     0.0
Humidity9am      0.0
Humidity3pm      0.0
Pressure9am      0.0
Pressure3pm      0.0
Temp9am          0.0
Temp3pm          0.0
RainToday        0.0
RainTomorrow     0.0
dtype: float64

In [284]:
y=df['RainTomorrow']
X=df.drop(['RainTomorrow'], axis=1)

In [285]:
numerical_columns = [col for col in X.columns if X[col].dtypes != 'O']
categorical_columns = [col for col in X.columns if X[col].dtypes == 'O']
categorical_columns 

['Location', 'WindGustDir', 'WindDir9am', 'WindDir3pm', 'RainToday']

In [286]:
X=pd.get_dummies(X)
y=pd.get_dummies(y)

In [287]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

In [288]:
model= tf.keras.Sequential([
Dense(16, input_shape=(109,), activation='relu'),
Dropout(0.2),
Dense(2, activation='softmax')
])

In [289]:
model.compile(optimizer='adam',loss='categorical_crossentropy',
    metrics=['accuracy'])

In [290]:
model.fit(X_train,y_train,validation_split=0.2,epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f064a2f9650>

In [291]:
results = model.evaluate(X_test, y_test, batch_size=128)
print("test loss, test acc:", results)

test loss, test acc: [0.3608396351337433, 0.8437879085540771]
