In [1]:
import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
import joblib
import warnings  # Supress warnings
import os
import random

In [22]:
model = 'SARIMA'
num = 6

In [23]:
df = pd.read_csv("./files/interpolated_m.csv", index_col=0)
df.index = pd.to_datetime(df.index, format='%Y-%m-%d')
preds = pd.read_csv("./predictions/"+model+"/"+str(num)+".csv", index_col=0)
preds.index = pd.to_datetime(preds.index, format='%Y-%m-%d')

In [24]:
df

Unnamed: 0_level_0,Rainfall,Depth_to_Groundwater,Temperature,Drainage_Volume,River_Hydrometry
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2009-01-31,6.3,-30.503871,4.964516,-827548.704,2.387097
2009-02-28,14.0,-29.350000,5.125000,-717952.896,2.546429
2009-03-31,34.2,-28.502903,9.254839,-879621.120,2.490323
2009-04-30,16.3,-27.736333,13.806667,-845030.016,2.403333
2009-05-31,28.6,-27.478065,19.809677,-997805.952,2.109677
...,...,...,...,...,...
2020-02-29,18.6,-25.261724,8.989655,-822268.800,2.420690
2020-03-31,70.2,-24.945355,9.758065,-830281.536,2.722581
2020-04-30,41.6,-24.805133,14.016667,-739037.952,2.500000
2020-05-31,45.6,-24.716774,18.754839,-747226.080,2.470968


In [25]:
preds

Unnamed: 0,Rainfall,Depth_to_Groundwater,Temperature,Drainage_Volume,River_Hydrometry
2017-02-28,0.433081,0.687409,0.205136,0.890262,0.851393
2017-03-31,0.328804,0.739558,0.285787,0.948351,0.749209
2017-04-30,0.314983,0.718209,0.445964,0.900525,0.665164
2017-05-31,0.298159,0.749648,0.559313,0.920645,0.600623
2017-06-30,0.296329,0.710295,0.766556,0.842713,0.524585
2017-07-31,0.267908,0.534575,0.905169,0.554602,0.400105
2017-08-31,0.223502,0.538916,0.82002,0.614558,0.330283
2017-09-30,0.157897,0.465379,0.728594,0.570448,0.261628
2017-10-31,0.141131,0.484865,0.451404,0.630769,0.470685
2017-11-30,0.161441,0.444977,0.339834,0.634935,0.517619


In [26]:
loaded_scaler = joblib.load('files/scaler_m.joblib')

In [27]:
unscaled_values = loaded_scaler.inverse_transform(preds.values)
unscaled_df = pd.DataFrame(unscaled_values, columns=df.columns, index=preds.index)
unscaled_df

Unnamed: 0,Rainfall,Depth_to_Groundwater,Temperature,Drainage_Volume,River_Hydrometry
2017-02-28,82.891613,-23.780116,8.461334,-742183.096061,3.033868
2017-03-31,62.933036,-23.036163,10.528584,-711856.453853,2.888579
2017-04-30,60.287707,-23.340725,14.634293,-736825.182106,2.769082
2017-05-31,57.067581,-22.892219,17.539692,-726321.128604,2.677316
2017-06-30,56.717387,-23.45363,22.851776,-767007.291858,2.569203
2017-07-31,51.27755,-25.960468,26.404741,-917422.477101,2.392214
2017-08-31,42.778321,-25.898544,24.222182,-886121.210153,2.292938
2017-09-30,30.221522,-26.94763,21.878727,-909149.502467,2.195323
2017-10-31,27.012551,-26.669637,14.773732,-877657.909451,2.492566
2017-11-30,30.899794,-27.238681,11.913935,-875482.658033,2.559299


In [28]:
preds = unscaled_df

In [29]:
metrics = {}
def calc_metrics(name):

    results = pd.DataFrame(data={'Predictions':preds[name], 'Actuals':df[name]}, index=df.index)

    # Step 2: Calculate the differences between the predicted and actual values
    results['Difference'] = results['Actuals'] - results['Predictions']

    # Step 3: Calculate squared differences for RMSE
    results['Squared_difference'] = results['Difference'] ** 2

    # Step 4: Calculate absolute differences for MAE and MAPE
    results['Absolute_difference'] = np.abs(results['Difference'])
    results['Absolute_percentage_difference'] = np.abs(results['Difference'] / results['Actuals'])

    # Step 5: Compute the metrics
    rmse = np.sqrt(results['Squared_difference'].mean())
    mae = results['Absolute_difference'].mean()
    mape = results['Absolute_percentage_difference'].mean() * 100  # to get the percentage
    return rmse, mae, mape

for column in df.columns:

    rmse, mae, mape = calc_metrics(column)
    metrics[column] = {
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape
    }
metrics_df = pd.DataFrame(metrics)

In [30]:
metrics_df

Unnamed: 0,Rainfall,Depth_to_Groundwater,Temperature,Drainage_Volume,River_Hydrometry
RMSE,45.381322,0.365781,1.688256,51803.978534,0.15762
MAE,36.681796,0.277915,1.32548,39867.201588,0.130024
MAPE,116.932364,1.103021,11.457643,4.905666,5.101857


In [31]:
metrics_df.to_csv('metrics/'+model+'/'+str(num+3)+'.csv')