In [7]:
import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)

In [20]:
num = 2
model = "ARIMA"

In [21]:
df = pd.read_csv("./files/interpolated_w.csv", index_col=0)
df.index = pd.to_datetime(df.index, format = '%Y-%m-%d')

In [22]:
preds = pd.read_csv("./predictions/"+model+"/"+str(num)+".csv", index_col=0)
preds.index = pd.to_datetime(preds.index, format = '%Y-%m-%d')

In [23]:
preds

Unnamed: 0,Rainfall,Depth_to_Groundwater,Temperature,Drainage_Volume,River_Hydrometry
2017-01-29,14.427634,-23.697495,4.738158,-180401.564048,2.831478
2017-02-05,13.817573,-23.610478,4.897174,-177507.156389,2.838014
2017-02-12,15.188064,-23.449970,7.280185,-175121.106689,2.839193
2017-02-19,15.180846,-23.518224,6.492589,-176539.724712,3.055875
2017-02-26,14.643441,-23.466651,6.932556,-169818.902112,2.889114
...,...,...,...,...,...
2020-06-07,13.801858,-24.780747,20.851859,-182571.016494,2.439211
2020-06-14,13.681304,-24.611395,22.500546,-176478.758671,2.454017
2020-06-21,16.929065,-24.761679,22.324470,-179552.808803,2.708864
2020-06-28,12.886534,-24.751379,23.560890,-190137.617697,2.705996


In [24]:
metrics = {}
def calc_metrics(name):

    results = pd.DataFrame(data={'Predictions':preds[name], 'Actuals':df[name]}, index=preds.index)

    # Step 2: Calculate the differences between the predicted and actual values
    results['Difference'] = results['Actuals'] - results['Predictions']

    # Step 3: Calculate squared differences for RMSE
    results['Squared_difference'] = results['Difference'] ** 2

    # Step 4: Calculate absolute differences for MAE and MAPE
    results['Absolute_difference'] = np.abs(results['Difference'])
    results['Absolute_percentage_difference'] = np.abs(results['Difference'] / results['Actuals'])

    # Step 5: Compute the metrics
    rmse = np.sqrt(results['Squared_difference'].mean())
    mae = results['Absolute_difference'].mean()
    mape = results['Absolute_percentage_difference'].mean() * 100  # to get the percentage
    return rmse, mae, mape

for column in df.columns:

    rmse, mae, mape = calc_metrics(column)
    metrics[column] = {
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape
    }
metrics_df = pd.DataFrame(metrics)

In [25]:
metrics_df

Unnamed: 0,Rainfall,Depth_to_Groundwater,Temperature,Drainage_Volume,River_Hydrometry
RMSE,16.93814,0.131976,2.108532,10482.379982,0.147512
MAE,13.395015,0.099054,1.597837,7966.579156,0.110613
MAPE,inf,0.389695,15.758976,4.19837,4.224389


In [None]:
metrics_df.to_csv('metrics/'+model+'/'+str(num)+'.csv')