In [1]:
import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
import joblib
import warnings  # Supress warnings
import os
import random

In [73]:
model = 'RNN'
num = 4

In [74]:
df = pd.read_csv("./files/interpolated_data.csv", index_col=0)
df.index = pd.to_datetime(df.index, format='%Y-%m-%d')
preds = pd.read_csv("./predictions/"+model+"/"+str(num)+".csv", index_col=0)
preds.index = pd.to_datetime(preds.index, format='%Y-%m-%d')

In [75]:
df

Unnamed: 0_level_0,Rainfall,Depth_to_Groundwater,Temperature,Drainage_Volume,River_Hydrometry
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2009-01-01,0.0,-31.14,5.2,-24530.688,2.4
2009-01-02,0.0,-31.11,2.3,-28785.888,2.5
2009-01-03,0.0,-31.07,4.4,-25766.208,2.4
2009-01-04,0.0,-31.05,0.8,-27919.296,2.4
2009-01-05,0.0,-31.01,-1.9,-29854.656,2.3
...,...,...,...,...,...
2020-06-26,0.0,-25.07,25.7,-29930.688,2.5
2020-06-27,0.0,-25.11,26.2,-31332.960,2.4
2020-06-28,0.0,-25.19,26.9,-32120.928,2.4
2020-06-29,0.0,-25.18,26.9,-30602.880,2.4


In [76]:
loaded_scaler = joblib.load('files/scaler.joblib')

In [77]:
unscaled_values = loaded_scaler.inverse_transform(preds.values)
unscaled_df = pd.DataFrame(unscaled_values, columns=df.columns, index=preds.index)
unscaled_df

Unnamed: 0_level_0,Rainfall,Depth_to_Groundwater,Temperature,Drainage_Volume,River_Hydrometry
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-01-19,1.039279,-23.711855,5.425251,-25879.230421,2.831420
2017-01-20,1.039279,-23.664973,5.039147,-25972.564849,2.922785
2017-01-21,1.039279,-23.630847,3.019989,-27173.390417,2.891880
2017-01-22,1.039279,-23.627182,3.555414,-27720.273105,2.902544
2017-01-23,1.039279,-23.638621,4.963174,-27137.703428,2.899976
...,...,...,...,...,...
2020-06-26,1.039279,-24.836473,24.633385,-29939.983392,2.542913
2020-06-27,1.039279,-24.898896,25.204965,-29355.983404,2.541782
2020-06-28,1.039279,-24.957976,25.715289,-30631.104020,2.425451
2020-06-29,1.039279,-25.021745,26.412992,-31233.643542,2.455907


In [78]:
preds = unscaled_df

In [79]:
metrics = {}
def calc_metrics(name):

    results = pd.DataFrame(data={'Predictions':preds[name], 'Actuals':df[name]}, index=df.index)

    # Step 2: Calculate the differences between the predicted and actual values
    results['Difference'] = results['Actuals'] - results['Predictions']

    # Step 3: Calculate squared differences for RMSE
    results['Squared_difference'] = results['Difference'] ** 2

    # Step 4: Calculate absolute differences for MAE and MAPE
    results['Absolute_difference'] = np.abs(results['Difference'])
    results['Absolute_percentage_difference'] = np.abs(results['Difference'] / results['Actuals'])

    # Step 5: Compute the metrics
    rmse = np.sqrt(results['Squared_difference'].mean())
    mae = results['Absolute_difference'].mean()
    mape = results['Absolute_percentage_difference'].mean() * 100  # to get the percentage
    return rmse, mae, mape

for column in df.columns:

    rmse, mae, mape = calc_metrics(column)
    metrics[column] = {
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape
    }
metrics_df = pd.DataFrame(metrics)

In [80]:
metrics_df

Unnamed: 0,Rainfall,Depth_to_Groundwater,Temperature,Drainage_Volume,River_Hydrometry
RMSE,5.582636,0.121243,2.048895,1612.65591,0.102213
MAE,2.450916,0.092808,1.529424,1204.66477,0.067446
MAPE,inf,0.367783,inf,4.559757,2.55206


In [81]:
metrics_df.to_csv('metrics/'+model+'/'+str(num+3)+'.csv')