In [1]:
!pip install hyperopt



In [35]:
import data_preparation
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
import tensorflow as tf
from hyperopt import tpe, hp, fmin
import pandas as pd
import numpy as np

In [3]:
df = data_preparation.prepare_data('weather.csv')
train_ts, valid_ts, n_input, n_features, y_valid = data_preparation.preprocess(data=df, train_model=True)

In [27]:
regression_columns = [
 'M_RAIN_PERCENTAGE_5',
 'M_RAIN_PERCENTAGE_10',
'M_RAIN_PERCENTAGE_15',
 'M_RAIN_PERCENTAGE_30',
 'M_RAIN_PERCENTAGE_45',
 'M_RAIN_PERCENTAGE_60',
]
classification_columns = [
'M_WEATHER_FORECAST_SAMPLES_M_WEATHER_5',
'M_WEATHER_FORECAST_SAMPLES_M_WEATHER_10',
'M_WEATHER_FORECAST_SAMPLES_M_WEATHER_15',
'M_WEATHER_FORECAST_SAMPLES_M_WEATHER_30',
'M_WEATHER_FORECAST_SAMPLES_M_WEATHER_45',
'M_WEATHER_FORECAST_SAMPLES_M_WEATHER_60'
]
y_columns = [
    'M_RAIN_PERCENTAGE_5',
    'M_RAIN_PERCENTAGE_10',
    'M_RAIN_PERCENTAGE_15',
    'M_RAIN_PERCENTAGE_30',
    'M_RAIN_PERCENTAGE_45',
    'M_RAIN_PERCENTAGE_60',
    'M_WEATHER_FORECAST_SAMPLES_M_WEATHER_5',
    'M_WEATHER_FORECAST_SAMPLES_M_WEATHER_10',
    'M_WEATHER_FORECAST_SAMPLES_M_WEATHER_15',
    'M_WEATHER_FORECAST_SAMPLES_M_WEATHER_30',
    'M_WEATHER_FORECAST_SAMPLES_M_WEATHER_45',
    'M_WEATHER_FORECAST_SAMPLES_M_WEATHER_60',
    ]


### Model definition
Here, we define our GRU (Gated recurrent unit) neural network:
* The sequential model consists of 2 layers of GRU with 652 and 326 units of output respectively
* the third layer is a Dense layer which outputs 312 units,
* the fifth layer drops out 20% of the data each epoch,
* the seventh, final layer is a Dense layer reducing the output to 12 units (same as the size of our target vector),
* the neural network is compiled with Adam optimizer, the learning rate is set to 0.0002325, and the MSE (mean squared error) loss is calculated for training and validation.

In [9]:
def build_model(lr, dense_layers, gru_layers, first_gru_neurons_num,first_dense_neurons_num, dropout):
    opt = tf.keras.optimizers.Adam(learning_rate=lr)
    model = tf.keras.models.Sequential()
    for _ in range(gru_layers):
        if _ == gru_layers -1:
            model.add(tf.keras.layers.GRU(int(first_gru_neurons_num /(_ +1)), recurrent_dropout=dropout))
        elif _ == 0:
            model.add(tf.keras.layers.GRU(int(first_gru_neurons_num), input_shape=(n_input, n_features), recurrent_dropout=dropout, return_sequences=True))
        else:
            model.add(tf.keras.layers.GRU(int(first_gru_neurons_num/(_ +1)), recurrent_dropout=dropout,return_sequences=True))
    
    for _ in range(dense_layers):
        model.add(tf.keras.layers.Dense(int(first_dense_neurons_num/(_+1))))
        model.add(tf.keras.layers.Dropout(dropout))
    model.add(tf.keras.layers.Dense(12))
    model.compile(optimizer=opt, loss='mse')
    model.summary()
    return model
model = build_model(0.0003, 1, 2, 650,320, 0.2)

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru_6 (GRU)                 (None, 50, 650)           1333800   
                                                                 
 gru_7 (GRU)                 (None, 325)               952575    
                                                                 
 dense_6 (Dense)             (None, 320)               104320    
                                                                 
 dropout_3 (Dropout)         (None, 320)               0         
                                                                 
 dense_7 (Dense)             (None, 12)                3852      
                                                                 
Total params: 2,394,547
Trainable params: 2,394,547
Non-trainable params: 0
_________________________________________________________________


### Training
The GRU neural network is fit to our training data and validated on our validation data. The fitting runs for 100 epochs.

In [10]:
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0.1, patience=20, verbose=0, mode='auto', baseline=None, restore_best_weights=True)
history = model.fit(train_ts, epochs=120, validation_data=valid_ts, use_multiprocessing=True, callbacks = [es])

Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120


In [16]:
model.save('model.h5')

In [17]:
model = tf.keras.models.load_model('model.h5')

In [22]:
y_pred = model.predict(valid_ts)

### Metrics and performance
Here, we print out the metrics and performance of our model.
### Descaling predicted targets
Here, we descale the predicted targets, so that we can compare realistic data

In [36]:
def regression_metrics(y_pred, ground_truth):
    predictions = pd.DataFrame(y_pred, columns = y_columns)
    pred_df = predictions[regression_columns]
    ground_truth = ground_truth[regression_columns]
    mae = metrics.mean_absolute_error(ground_truth.to_numpy()[n_input:], pred_df.to_numpy())
    mse = metrics.mean_squared_error(ground_truth.to_numpy()[n_input:], pred_df.to_numpy())
    rmse = np.sqrt(mse)
    return mae, mse, rmse

def accuracy(y_pred, ground_truth):
    predictions = pd.DataFrame(y_pred,columns=y_columns)
    pred_df = predictions[classification_columns].astype(int)
    equality = tf.math.equal(pred_df.to_numpy(), ground_truth[classification_columns].iloc[:len(ground_truth) - n_input].to_numpy().astype(int))
    return tf.math.reduce_mean(tf.cast(equality, tf.float32)).numpy()

In [37]:
_accuracy = accuracy(y_pred, y_valid)

In [38]:
mae,mse,rmse = regression_metrics(y_pred, y_valid)

In [39]:
print(f'MAE: {mae}')
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'Accuracy: {_accuracy}')

MAE: 2.012901154959756
MSE: 10.57400077345887
RMSE: 3.2517688683943806
Accuracy: 0.8080400824546814


In [42]:
def get_model_output(y_pred):
    preds_dicts = []
    predictions = pd.DataFrame(y_pred,columns=y_columns)
    predictions[classification_columns] = predictions[classification_columns].astype(int)
    for _, row in predictions.iterrows():
        preds_dicts.append({
            '5': {
                'type': int(row['M_WEATHER_FORECAST_SAMPLES_M_WEATHER_5']),
                'rain_percentage': row['M_RAIN_PERCENTAGE_5']
            },
            '10': {
                'type': int(row['M_WEATHER_FORECAST_SAMPLES_M_WEATHER_10']),
                'rain_percentage': row['M_RAIN_PERCENTAGE_10']
            },
            '15': {
                'type': int(row['M_WEATHER_FORECAST_SAMPLES_M_WEATHER_15']),
                'rain_percentage': row['M_RAIN_PERCENTAGE_15']
            },
            '30': {
                'type': int(row['M_WEATHER_FORECAST_SAMPLES_M_WEATHER_30']),
                'rain_percentage': row['M_RAIN_PERCENTAGE_30']
            },
            '45': {
                'type': int(row['M_WEATHER_FORECAST_SAMPLES_M_WEATHER_45']),
                'rain_percentage': row['M_RAIN_PERCENTAGE_45']
            },
            '60': {
                'type': int(row['M_WEATHER_FORECAST_SAMPLES_M_WEATHER_60']),
                'rain_percentage': row['M_RAIN_PERCENTAGE_60']
            },
        })
    return preds_dicts

In [43]:
results = get_model_output(y_pred)

In [44]:
results

[{'5': {'type': 0, 'rain_percentage': 10.495091438293457},
  '10': {'type': 0, 'rain_percentage': 10.346924781799316},
  '15': {'type': 1, 'rain_percentage': 9.436767578125},
  '30': {'type': 0, 'rain_percentage': 5.586713790893555},
  '45': {'type': 0, 'rain_percentage': 7.927698135375977},
  '60': {'type': 0, 'rain_percentage': 12.414467811584473}},
 {'5': {'type': 0, 'rain_percentage': 10.49499225616455},
  '10': {'type': 0, 'rain_percentage': 10.34683609008789},
  '15': {'type': 1, 'rain_percentage': 9.43666934967041},
  '30': {'type': 0, 'rain_percentage': 5.586588382720947},
  '45': {'type': 0, 'rain_percentage': 7.927431106567383},
  '60': {'type': 0, 'rain_percentage': 12.414077758789062}},
 {'5': {'type': 0, 'rain_percentage': 10.494885444641113},
  '10': {'type': 0, 'rain_percentage': 10.34674072265625},
  '15': {'type': 1, 'rain_percentage': 9.436566352844238},
  '30': {'type': 0, 'rain_percentage': 5.586464881896973},
  '45': {'type': 0, 'rain_percentage': 7.927177906036377