In [1]:
import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
import joblib
import warnings  # Supress warnings
import os
import random

In [154]:
model = 'ARIMAX'
num = 6

In [155]:
df = pd.read_csv("./files/interpolated_m.csv", index_col=0)
df.index = pd.to_datetime(df.index, format='%Y-%m-%d')
preds_df = pd.read_csv("./files/interpolated_m.csv", index_col=0)
df.index = pd.to_datetime(df.index, format='%Y-%m-%d')
preds = pd.read_csv("./predictions/"+model+"/"+str(num)+".csv", index_col=0)
preds.index = pd.to_datetime(preds.index, format='%Y-%m-%d')

In [156]:
preds

Unnamed: 0,Depth_to_Groundwater
2017-02-28,0.693395
2017-03-31,0.727176
2017-04-30,0.726836
2017-05-31,0.741491
2017-06-30,0.72401
2017-07-31,0.558886
2017-08-31,0.536447
2017-09-30,0.453145
2017-10-31,0.482702
2017-11-30,0.432109


In [157]:
preds_df['Depth_to_Groundwater'] = preds

In [158]:
preds_df = preds_df.dropna()

In [159]:
loaded_scaler = joblib.load('files/scaler_m.joblib')

In [160]:
unscaled_values = loaded_scaler.inverse_transform(preds_df.values)
unscaled_df = pd.DataFrame(unscaled_values, columns=df.columns, index=preds.index)
unscaled_df

Unnamed: 0,Rainfall,Depth_to_Groundwater,Temperature,Drainage_Volume,River_Hydrometry
2017-02-28,8306.76,-23.69472,229.682535,-357565300000.0,6.027882
2017-03-31,12517.56,-23.212806,307.979043,-387948100000.0,5.964981
2017-04-30,8766.12,-23.21766,361.285871,-374940600000.0,5.600656
2017-05-31,8077.08,-23.00859,483.02256,-425485400000.0,5.575125
2017-06-30,3713.16,-23.25797,662.294022,-486147800000.0,5.268896
2017-07-31,1722.6,-25.613644,685.269344,-490717600000.0,4.974288
2017-08-31,995.28,-25.933758,722.890884,-492466900000.0,4.832105
2017-09-30,18297.84,-27.122165,491.241419,-445196300000.0,5.193066
2017-10-31,3253.8,-26.700496,387.769781,-465094900000.0,5.111884
2017-11-30,14201.88,-27.422254,246.624237,-451087300000.0,5.548523


In [161]:
unscaled_df['Depth_to_Groundwater']

2017-02-28   -23.694720
2017-03-31   -23.212806
2017-04-30   -23.217660
2017-05-31   -23.008590
2017-06-30   -23.257970
2017-07-31   -25.613644
2017-08-31   -25.933758
2017-09-30   -27.122165
2017-10-31   -26.700496
2017-11-30   -27.422254
2017-12-31   -27.708780
2018-01-31   -27.663537
2018-02-28   -26.862385
2018-03-31   -26.815733
2018-04-30   -25.237879
2018-05-31   -23.073379
2018-06-30   -23.095070
2018-07-31   -23.477549
2018-08-31   -25.323133
2018-09-30   -25.169283
2018-10-31   -25.341727
2018-11-30   -25.870480
2018-12-31   -25.697321
2019-01-31   -26.503538
2019-02-28   -26.698294
2019-03-31   -26.007832
2019-04-30   -25.399816
2019-05-31   -25.287993
2019-06-30   -25.215490
2019-07-31   -25.409757
2019-08-31   -26.507133
2019-09-30   -26.448482
2019-10-31   -26.615648
2019-11-30   -27.046387
2019-12-31   -26.154554
2020-01-31   -25.857099
2020-02-29   -25.207320
2020-03-31   -25.064509
2020-04-30   -24.534402
2020-05-31   -24.898082
2020-06-30   -24.533306
Name: Depth_to_G

In [162]:
metrics = {}
def calc_metrics(name):

    results = pd.DataFrame(data={'Predictions':unscaled_df[name], 'Actuals':df[name]}, index=unscaled_df.index)

    results = results.dropna()

    # Step 2: Calculate the differences between the predicted and actual values
    results['Difference'] = results['Actuals'] - results['Predictions']

    # Step 3: Calculate squared differences for RMSE
    results['Squared_difference'] = results['Difference'] ** 2

    # Step 4: Calculate absolute differences for MAE and MAPE
    results['Absolute_difference'] = np.abs(results['Difference'])
    results['Absolute_percentage_difference'] = np.abs(results['Difference'] / results['Actuals'])

    # Step 5: Compute the metrics
    rmse = np.sqrt(results['Squared_difference'].mean())
    mae = results['Absolute_difference'].mean()
    mape = results['Absolute_percentage_difference'].mean() * 100  # to get the percentage
    return rmse, mae, mape

for column in ['Depth_to_Groundwater']:

    rmse, mae, mape = calc_metrics(column)
    metrics[column] = {
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape
    }
metrics_df = pd.DataFrame(metrics)

In [163]:
metrics_df

Unnamed: 0,Depth_to_Groundwater
MAE,0.29061
MAPE,1.150848
RMSE,0.374425


In [164]:
metrics_df.to_csv('metrics/'+model+'/'+str(num+3)+'.csv')