In [None]:
# Code that tunes and trains the ARIMA model on all the exchange rates to perform forecasts and calculate the model uncertainty using the ARIMA model itself.
# Author Samwel Portelli <samwel.portelli.18@um.edu.mt>


In [None]:
from deel.puncc.metrics import regression_ace
from deel.puncc.metrics import regression_mean_coverage
from deel.puncc.metrics import regression_sharpness
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
import numpy as np
import matplotlib.pyplot as plt
import pmdarima
from pmdarima.arima import ndiffs
import pmdarima as pm
import os
import csv

def direction_accuracy(actual, predicted):
    """
    Calculate the direction accuracy between actual and predicted values.

    Parameters:
    actual (list): List of actual values (time series).
    predicted (list): List of predicted values (time series).

    Returns:
    float: Direction accuracy in percentage.
    """
    # Ensure the lengths of actual and predicted are the same
    if len(actual) != len(predicted):
        raise ValueError("Input lists must have the same length.")

    # Calculate differences between consecutive values
    actual_diff = [actual[i] - actual[i-1] for i in range(1, len(actual))]
    predicted_diff = [predicted[i] - actual[i-1] for i in range(1, len(predicted))]

    # Count the number of correct predictions
    correct_predictions = sum((a * p) > 0 for a, p in zip(actual_diff, predicted_diff))

    # Calculate accuracy as a percentage
    accuracy_percentage = (correct_predictions / len(actual_diff)) * 100.0

    return float(accuracy_percentage)

def create_unique_folder(folder_name):
    version = 1
    new_folder_name = folder_name
    while os.path.exists(new_folder_name):
        new_folder_name = f"{folder_name}_v{version}"
        version += 1
    os.makedirs(new_folder_name)
    return new_folder_name

In [None]:
def forecast_one_step(model, exog_y_test_values, alpha):
    fc, conf_int = model.predict(n_periods=1,X=exog_y_test_values, return_conf_int=True, alpha=alpha)
    return (
        fc.tolist()[0],
        np.asarray(conf_int).tolist()[0])

In [None]:
# Load the exchange_rate dataset
data = pd.read_csv("C:/Users/porte/Downloads/dataset/exchange_rate/exchange_rate.csv", index_col='date', parse_dates=True)

# Select the relevant columns for the model
columns = ['0', '1', '2', '3', '4', '5', '6', 'OT']

# Extract the relevant subset for training and testing
window_size = 20

train_data = data.iloc[:5311]
test_data = data.iloc[5291:]

train_end = 5311
test_start = 5311
test_end = len(data)

alphas=[0.01, 0.05, 0.1, 0.15]
confidence=np.subtract(np.ones(len(alphas)), alphas)

targets = ['0', '1', '2', '3', '4', '5', '6', 'OT']

folder_names = ['.\\arimarunningaModelPerAlpha\\test_arima_model_data_0', '.\\arimarunningaModelPerAlpha\\test_arima_model_data_1', '.\\arimarunningaModelPerAlpha\\test_arima_model_data_2', '.\\arimarunningaModelPerAlpha\\test_arima_model_data_3', '.\\arimarunningaModelPerAlpha\\test_arima_model_data_4', '.\\arimarunningaModelPerAlpha\\test_arima_model_data_5', '.\\arimarunningaModelPerAlpha\\test_arima_model_data_6', '.\\arimarunningaModelPerAlpha\\test_arima_model_data_OT']

orders = []
model_alpha = []

for ind, target in enumerate(targets):
     
    folder_name = create_unique_folder(folder_names[ind])
        
    # Create rolling windows for training data
    train_X, train_y = create_rolling_windows(train_data, window_size, target)

    # Create rolling windows for testing data
    test_X, test_y = create_rolling_windows(test_data, window_size, target)

    ace_list, mean_coverage_list, average_width_list, direction_accuracy_value_list = [], [], [], []

    for alpha in alphas:
        
        df = pd.read_csv("C:/Users/porte/Downloads/dataset/exchange_rate/exchange_rate.csv", usecols=[target], parse_dates=True)
        df.rename(columns={target: "Open"}, inplace=True)

        exog_var = targets.remove(target)

        exog_df = pd.read_csv("C:/Users/porte/Downloads/dataset/exchange_rate/exchange_rate.csv", usecols=exog_var, parse_dates=False)
        exog_df = exog_df.drop(['date'], axis=1)

        y_train = df[1:train_end].values
        y_test = df[test_start:test_end].values

        exog_y_train = exog_df[:train_end-1].values
        exog_y_test = exog_df[test_start:test_end].values
        exog_y_test = np.concatenate((np.array([exog_y_train[-1]]), exog_y_test[:-1]))

        kpss_diffs = ndiffs(y_train, alpha=0.05, test='kpss', max_d=6)
        adf_diffs = ndiffs(y_train, alpha=0.05, test='adf', max_d=6)
        n_diffs = max(adf_diffs, kpss_diffs)
        
        model = pm.auto_arima(y_train, exog_y_train, d=n_diffs, seasonal=False, stepwise=True,
                         suppress_warnings=True, max_p=6, trace=2)

        print(model.order)
        orders.append(model.order)
        model_alpha.append(str(target)+'_'+str(alpha))

        print('Testing alpha value: '+str(alpha))

        # Generate forecasts
        forecast_values, forecast_intervals_lower, forecast_intervals_upper = [], [], []
        
        for ind, new_ob in enumerate(y_test):
            
            fc, conf = forecast_one_step(model, np.array([exog_y_test[ind]]), alpha)
            forecast_values.append(fc)
            forecast_intervals_lower.append(conf[0])
            forecast_intervals_upper.append(conf[1])

            # Updates the existing model with a small number of MLE steps
            model.update(new_ob, np.array([exog_y_test[ind]]))
            

        forecast_intervals_lower = np.array(forecast_intervals_lower)
        forecast_intervals_upper = np.array(forecast_intervals_upper)

        ace = regression_ace(test_y, forecast_intervals_lower, forecast_intervals_upper, alpha)
        mean_coverage = regression_mean_coverage(test_y, forecast_intervals_lower, forecast_intervals_upper)
        average_width = regression_sharpness(forecast_intervals_lower, forecast_intervals_upper)
        direction_accuracy_value = direction_accuracy(test_y, np.array(forecast_values).flatten())

        print('The average coverage error is: '+str(ace))
        print('The mean coverage: '+str(mean_coverage))
        print('The PI average width: '+str(average_width))
        print('The directional accuracy value is: '+str(direction_accuracy_value))

        # Create a list with the values
        cp_metric_data = [['ace', ace], ['mean_coverage', mean_coverage], ['average_width', average_width], ['direction_accuracy', direction_accuracy]]

        # Specify the CSV file name
        metrics_csv_file_name = folder_name+'\\arima_confidence_metrics_alp_'+str(int((1-alpha)*100))+'.csv'

        # Write to the CSV file
        with open(metrics_csv_file_name, mode='w', newline='') as file:
            writer = csv.writer(file)
            # Write the header
            writer.writerow(['Metric', 'Value'])
            # Write the data
            writer.writerows(cp_metric_data)

        print(f"Metrics saved to {metrics_csv_file_name}")

        # Create a dictionary with the arrays
        actual_pred_data = {
            'y_true': test_y,
            'y_pred_from_arima': np.array(forecast_values).flatten(),
            'y_lower': forecast_intervals_lower,
            'y_upper': forecast_intervals_upper
        }

        # Create a DataFrame
        df = pd.DataFrame(actual_pred_data)
        
        val_df = df[:760]
        
        test_df = df[760:]

        # Specify the CSV file name
        val_preds_actual_csv_file_name = folder_name+'\\val_arima_y_true_and_pred_alp_'+str(int((1-alpha)*100))+'.csv'
        test_preds_actual_csv_file_name = folder_name+'\\test_arima_y_true_and_pred_alp_'+str(int((1-alpha)*100))+'.csv'
        
        # Write to the CSV file
        val_df.to_csv(val_preds_actual_csv_file_name, index=False)
        test_df.to_csv(test_preds_actual_csv_file_name, index=False)

        #print(f"Data saved to {preds_actual_csv_file_name}")

        ace_list.append(ace)
        mean_coverage_list.append(mean_coverage)
        average_width_list.append(average_width)
        direction_accuracy_value_list.append(direction_accuracy_value)

    # Create a new figure and axis (left y-axis)
    fig, ax1 = plt.subplots()

    # Plot Mean Coverage on the left y-axis
    ax1.plot(alphas, mean_coverage_list, label='Mean Coverage', marker='s', color='b')
    ax1.set_xlabel('Alpha')
    ax1.set_ylabel('Coverage', color='black')  # Generalized y-axis label
    ax1.tick_params('y', colors='black')

    # Plot 'Actual Confidence' as a dotted black line
    ax1.plot(alphas, confidence, label='Actual Confidence', linestyle='--', color='black', marker='o', alpha=0.3)

    # Create a second y-axis (right y-axis)
    ax2 = ax1.twinx()

    # Plot Average Width on the right y-axis
    ax2.plot(alphas, average_width_list, label='Average Width', marker='^', color='r')
    ax2.set_ylabel('Average Width', color='r')
    ax2.tick_params('y', colors='r')

    # Set the font color for both y-axes to black
    ax1.yaxis.label.set_color('black')
    ax2.yaxis.label.set_color('black')

    # Set the tick colors to black
    ax1.tick_params(axis='y', colors='black')
    ax2.tick_params(axis='y', colors='black')

    # Add legends
    lines, labels = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax2.legend(lines + lines2, labels + labels2, loc='upper left')

    # Title
    plt.title('Confidence Interval Metric vs. Alpha for ARIMA Model')

    # Save the plot as an image file
    plt.savefig(folder_name+'\\val_arima_metrics_vs_alpha.png')

    # Show the plot
    plt.show()

    # Combine lists into a list of tuples
    metric_data = list(zip(ace_list, mean_coverage_list, average_width_list, direction_accuracy_value_list))

    # Specify the CSV file name
    csv_file_name = folder_name+'\\arima_multiple_alpha_metrics_data.csv'

    # Write to the CSV file
    with open(csv_file_name, mode='w', newline='') as file:
        writer = csv.writer(file)
        # Write the header
        writer.writerow(['ACE', 'Mean Coverage', 'Average Width', 'Direction Accuracy'])
        # Write the data
        writer.writerows(metric_data)

    print(f"Metrics saved to {csv_file_name}")
    
# Open a CSV file to save the order
with open('.\\arimamodel_val_test_past_20\\arima_orders_'+target+'.csv', 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    
    # Write the header
    csvwriter.writerow(['target_alpha', 'order'])
    
    # Write the data
    for string, tuple_value in zip(model_alpha, orders):
        csvwriter.writerow([string, tuple_value])