In [1]:
import pandas as pd 
import numpy as np 
import math 

import sklearn 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import mean_squared_error 
from sklearn.preprocessing import MinMaxScaler 

import plotly 
import plotly.express as px
import plotly.graph_objects as go

from tensorflow import keras
from keras.models import Sequential
from keras import Input
from keras.layers import Dense, SimpleRNN

In [2]:

path = './weatherAUS.csv'

df = pd.read_csv(path, encoding='utf-8')

df = df[pd.isnull(df['MinTemp'])==False]

# Median daily temperature
df['MedTemp'] = df[['MinTemp', 'MaxTemp']].median(axis=1)


In [3]:
cities = ['Canberra', 'Sydney', 'Melbourne']
for city in cities:

    # Select targeted cities
    df_ = globals()['df' + city[:3]] = df[df['Location'] == city].copy()

    # Plot daily median temperatures in every city
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df_['Date'], 
                            y=df_['MedTemp'],
                            mode='lines',
                            name='Median Temperature',
                            opacity=0.8,
                            line=dict(color='black', width=1)
                            ))

    fig.update_layout(dict(plot_bgcolor = 'white'))
    
    fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                    zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                    showline=True, linewidth=1, linecolor='black',
                    title='Date'
                    )

    fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                    zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                    showline=True, linewidth=1, linecolor='black',
                    title='Degrees Celsius'
                    )
    fig.update_layout(title=dict(text="Median Daily Temperatures in "+ city, 
                                font=dict(color='black')))

    fig.show()

In [4]:
# Prepare train and test data before feeding them to the model
def prep_data(datain, time_step):

    # Create an array with indices for y elements based on the chosen time_step
    y_indices = np.arange(start=time_step, stop=len(datain), step=time_step)
    # Create y array based on the above indices 
    y_tmp = datain[y_indices]
 
    rows_X = len(y_tmp)
    X_tmp = datain[range(time_step*rows_X)]

    # reshape array into the desired shape
    X_tmp = np.reshape(X_tmp, (rows_X, time_step, 1))
    return X_tmp, y_tmp

# Train the model for each City

In [5]:
for city in cities:
    print(f"Train the {city} model:")
    # Select data for modeling and apply MinMax scaling
    X = globals()['df' + city[:3]][['MedTemp']]
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)


    # Create training and testing samples
    train_data, test_data = train_test_split(X_scaled, test_size=0.2, shuffle=False)


    # Prepare input X and target y arrays using previously defined function
    time_step = 7
    X_train, y_train = prep_data(train_data, time_step)
    X_test, y_test = prep_data(test_data, time_step)
    
    globals()['X_train_' + city[:3]], globals()['y_train_' + city[:3]] = X_train, y_train
    globals()['X_test_' + city[:3]], globals()['y_test_' + city[:3]] = X_test, y_test


    # Specify the structure of the Neural Network
    model = Sequential(name="First-RNN-Model") 
    model.add(Input(shape=(time_step,1), name='Input-Layer'))
    model.add(SimpleRNN(units=1, activation='tanh', name='Hidden-Recurrent-Layer')) 
    model.add(Dense(units=1, activation='tanh', name='Hidden-Layer')) 
    model.add(Dense(units=1, activation='linear', name='Output-Layer')) 



    model.compile(optimizer='adam',
                loss='mean_squared_error', 
                metrics=['MeanSquaredError', 'MeanAbsoluteError'],
                loss_weights=None,
                weighted_metrics=None,
                run_eagerly=None,
                steps_per_execution=None
                )


    # Fit keras model on the dataset
    model.fit(X_train,
            y_train, 
            batch_size=1, 
            epochs=20,
            verbose='auto', 
            callbacks=None, 
            validation_split=0.0,
            shuffle=True,
            class_weight=None,
            sample_weight=None, 
            initial_epoch=0, 
            validation_steps=None, 
            validation_batch_size=None, 
            validation_freq=1, 
            max_queue_size=10, 
            workers=1, 
            use_multiprocessing=False, 
            )


    # Predict the result on training data
    pred_train = model.predict(X_train)
    globals()['pred_train_' + city[:3]] = pred_train
    # Predict the result on test data
    pred_test = model.predict(X_test)
    globals()['pred_test_' + city[:3]] =  pred_test


    print("")
    print('-------------------- Model Summary --------------------')
    model.summary() 
    print("")
    print('-------------------- Weights and Biases --------------------')
    print("Note, the last parameter in each layer is bias while the rest are weights")
    print("")
    for layer in model.layers:
        print(layer.name)
        for item in layer.get_weights():
            print("  ", item)
    print("")
    print('---------- Evaluation on Training Data ----------')
    print("MSE: ", mean_squared_error(y_train, pred_train))
    print("")

    print('---------- Evaluation on Test Data ----------')
    print("MSE: ", mean_squared_error(y_test, pred_test))
    print("")

Train the Canberra model:
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

-------------------- Model Summary --------------------
Model: "First-RNN-Model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Hidden-Recurrent-Layer (Sim  (None, 1)                3         
 pleRNN)                                                         
                                                                 
 Hidden-Layer (Dense)        (None, 1)                 2         
                                                                 
 Output-Layer (Dense)        (None, 1)                 2         
                                                                 
Total params: 7
Trainable params: 7
Non-trainable params: 0
_______

In [6]:
for city in cities:
    y_train = globals()['y_train_' + city[:3]]
    pred_test = globals()['pred_test_' + city[:3]]
    pred_train = globals()['pred_train_' + city[:3]]
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=np.array(range(0,len(y_train))),
                            y=scaler.inverse_transform(y_train).flatten(),
                            mode='lines',
                            name='Median Temperature - Actual (Test)',
                            opacity=0.8,
                            line=dict(color='black', width=1)
                            ))
    fig.add_trace(go.Scatter(x=np.array(range(0,len(pred_train))),
                            y=scaler.inverse_transform(pred_train).flatten(),
                            mode='lines',
                            name='Median Temperature - Predicted (Test)',
                            opacity=0.8,
                            line=dict(color='red', width=1)
                            ))

    # Change chart background color
    fig.update_layout(dict(plot_bgcolor = 'white'))

    # Update axes lines
    fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                    zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                    showline=True, linewidth=1, linecolor='black',
                    title='Observation'
                    )

    fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                    zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                    showline=True, linewidth=1, linecolor='black',
                    title='Degrees Celsius'
                    )

    # Set figure title
    fig.update_layout(title=dict(text="Median Daily Temperatures in " + city, 
                                font=dict(color='black')),
                    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
                    )

    fig.show()

## Model Expansion
With the current setup, we feed in 7 days worth of data and get the prediction for the next day
We want to create an array that contains 7-day chunks offset by one day at a time
This is so we can make a prediction for every day in the data instead of every 7th day

In [7]:
for city in cities:
    print(f"{city} city")

    X_every = globals()['df' + city[:3]][['MedTemp']]
    X_every = scaler.transform(X_every)

    for i in range(0, len(X_every)-time_step):
        if i == 0:
            X_comb=X_every[i:i+time_step]
        else: 
            X_comb=np.append(X_comb, X_every[i:i+time_step])
    X_comb=np.reshape(X_comb, (math.floor(len(X_comb)/time_step), time_step, 1))
    print(X_comb.shape)

    globals()['df' + city[:3]]['MedTemp_prediction'] = np.append(np.zeros(time_step), scaler.inverse_transform(model.predict(X_comb)))


Canberra city
(3423, 7, 1)
Sydney city
(3333, 7, 1)
Melbourne city
(2706, 7, 1)


In [8]:
for city in cities:
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=globals()['df' + city[:3]]['Date'],
                            y=globals()['df' + city[:3]]['MedTemp'],
                            mode='lines',
                            name='Median Temperature - Actual',
                            opacity=0.8,
                            line=dict(color='black', width=1)
                            ))
    
    fig.add_trace(go.Scatter(x=globals()['df' + city[:3]]['Date'],
                            y=globals()['df' + city[:3]]['MedTemp_prediction'],
                            mode='lines',
                            name='Median Temperature - Predicted',
                            opacity=0.8,
                            line=dict(color='red', width=1)
                            ))

    fig.update_layout(dict(plot_bgcolor = 'white'))

    fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                    zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                    showline=True, linewidth=1, linecolor='black',
                    title='Observation'
                    )

    fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                    zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                    showline=True, linewidth=1, linecolor='black',
                    title='Degrees Celsius'
                    )

    fig.update_layout(title=dict(text="Median Daily Temperatures in " + city, 
                                font=dict(color='black')),
                    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
                    )

    fig.show()