Direct forcasting without external variables

In [4]:
# Updated data with LSTM results
results = {
    'Model': ['ARIMA', 'Linear Regression', 'LSTM'],
    'MSE': [39560.30, 936.30, 0.0016],
    'RMSE': [198.90, 30.60, 0.0394],
    'MAPE (%)': [13.77, 1.26, 8.12],
    'R^2': [-0.0721, 0.8141, 0.5501]
}

df_results = pd.DataFrame(results)
df_results


Unnamed: 0,Model,MSE,RMSE,MAPE (%),R^2
0,ARIMA,39560.3,198.9,13.77,-0.0721
1,Linear Regression,936.3,30.6,1.26,0.8141
2,LSTM,0.0016,0.0394,8.12,0.5501


In [5]:
# Formatting the results to 4 decimal places

# Linear Regression Rolling Forecast on Validation Set
mse_roll_formatted = round(9240.433040276355, 4)
rmse_roll_formatted = round(96.12717118627987, 4)
mape_roll_formatted = round(4.938131427915046, 4)
r2_roll_formatted = round(0.963778301801877, 4)

# Linear Regression on Test Set (using training + validation data)
mse_test_formatted = round(3301.4162102965697, 4)
rmse_test_formatted = round(57.45795167160564, 4)
mape_test_formatted = round(2.9760494961837987, 4)
r2_test_formatted = round(0.9093127977105935, 4)

# LSTM Rolling Forecast
mse_lstm_formatted = round(0.0006782468181865058, 4)
rmse_lstm_formatted = round(0.026043172198994994, 4)
mape_lstm_formatted = round(5.801554722440465, 4)
r2_lstm_formatted = round(0.7699207781661014, 4)


In [9]:
import pandas as pd
# Create a DataFrame with the metrics
df = pd.DataFrame({
    'Model': ['Linear Regression (Validation)', 'Linear Regression (Test)', 'LSTM (Rolling Forecast)'],
    'MSE': [9240.4330, 3301.4162, 2657.2365],
    'RMSE': [96.1272, 57.4580, 51.5484],
    'MAPE (%)': [4.9381, 2.9760, 42.1347],
    'R^2': [0.9638, 0.9093, 0.9226]
})

# Display the table
df


Unnamed: 0,Model,MSE,RMSE,MAPE (%),R^2
0,Linear Regression (Validation),9240.433,96.1272,4.9381,0.9638
1,Linear Regression (Test),3301.4162,57.458,2.976,0.9093
2,LSTM (Rolling Forecast),2657.2365,51.5484,42.1347,0.9226


In [11]:
# Organizing the provided results into a single dataframe

# The previously provided metrics for ARIMA, Linear Regression, and LSTM
results_previous = {
    'Model': ['ARIMA', 'Linear Regression (Direct)', 'LSTM (Direct)'],
    'MSE': [39560.30, 936.30, 0.0016],
    'RMSE': [198.90, 30.60, 0.0394],
    'MAPE (%)': [13.77, 1.26, 8.12],
    'R^2': [-0.0721, 0.8141, 0.5501]
}


# The new metrics for Linear Regression and LSTM (rolling forecast, no external factors)
results_rolling_no_ext = {
    'Model': ['Linear Regression (Rolling)', 'LSTM (Rolling)'],
    'MSE': [1671.8537, 2657.2365],
    'RMSE': [40.8883, 51.5484],
    'MAPE (%)': [2.1557, 4.8182],
    'R^2': [0.9547, 0.9226]
}

# The new metrics for different ARIMA configurations
results_arima_configs = {
    'Model': ['ARIMA (Rolling, No Opt Params)', 'ARIMA (Rolling, Opt Params)', 
              'ARIMA (Rolling, Opt Params, 3-day Interval)', 'ARIMA (Rolling, Opt Params, 7-day Interval)'],
    'MSE': [1539.7291, 1556.5874, 1375.0673, 2046.4604],
    'RMSE': [39.2394, 39.4536, 37.0819, 45.2378],
    'MAPE (%)': [2.0799, 2.1253, 2.1389, 2.4333],
    'R^2': [0.9583, 0.9578, 0.9636, 0.9376]
}

# Combining all the results into a single dataframe
all_results = pd.concat([pd.DataFrame(data) for data in [results_previous, results_rolling_no_ext, results_arima_configs]], ignore_index=True)
all_results


Unnamed: 0,Model,MSE,RMSE,MAPE (%),R^2
0,ARIMA,39560.3,198.9,13.77,-0.0721
1,Linear Regression (Direct),936.3,30.6,1.26,0.8141
2,LSTM (Direct),0.0016,0.0394,8.12,0.5501
3,Linear Regression (Rolling),1671.8537,40.8883,2.1557,0.9547
4,LSTM (Rolling),2657.2365,51.5484,4.8182,0.9226
5,"ARIMA (Rolling, No Opt Params)",1539.7291,39.2394,2.0799,0.9583
6,"ARIMA (Rolling, Opt Params)",1556.5874,39.4536,2.1253,0.9578
7,"ARIMA (Rolling, Opt Params, 3-day Interval)",1375.0673,37.0819,2.1389,0.9636
8,"ARIMA (Rolling, Opt Params, 7-day Interval)",2046.4604,45.2378,2.4333,0.9376


In [12]:
# Data for performance metrics
metrics_data = {
    "Model": [
        "ARIMAX (Gold, Rolling Forecast)", 
        "ARIMAX (Stock, Rolling Forecast)", 
        "SARIMAX (Gold, Rolling Forecast)", 
        "SARIMAX (Stock, Rolling Forecast)", 
        "Linear Regression (Stock, Rolling)", 
        "Linear Regression (Gold, Rolling)", 
        "LSTM (Gold, Rolling Forecast)", 
        "LSTM (Stock, Rolling Forecast)"
    ],
    "MSE": [
        8842.8131, 
        19230.1702, 
        96342.76, 
        18718.94, 
        2308.61, 
        1080.44, 
        3181.26, 
        84047.8282
    ],
    "RMSE": [
        94.0362, 
        138.6729, 
        310.39, 
        136.82, 
        48.05, 
        32.87, 
        56.40, 
        289.9100
    ],
    "MAPE (%)": [
        5.5728, 
        8.6114, 
        25.35, 
        8.66, 
        2.79, 
        1.91, 
        3.48, 
        21.8252
    ],
    "R^2": [
        0.7303, 
        0.4912, 
        -4.55, 
        0.50, 
        0.93, 
        0.97, 
        0.91, 
        -4.3679
    ]
}

# Create a DataFrame from the metrics data
df_metrics = pd.DataFrame(metrics_data)
df_metrics


Unnamed: 0,Model,MSE,RMSE,MAPE (%),R^2
0,"ARIMAX (Gold, Rolling Forecast)",8842.8131,94.0362,5.5728,0.7303
1,"ARIMAX (Stock, Rolling Forecast)",19230.1702,138.6729,8.6114,0.4912
2,"SARIMAX (Gold, Rolling Forecast)",96342.76,310.39,25.35,-4.55
3,"SARIMAX (Stock, Rolling Forecast)",18718.94,136.82,8.66,0.5
4,"Linear Regression (Stock, Rolling)",2308.61,48.05,2.79,0.93
5,"Linear Regression (Gold, Rolling)",1080.44,32.87,1.91,0.97
6,"LSTM (Gold, Rolling Forecast)",3181.26,56.4,3.48,0.91
7,"LSTM (Stock, Rolling Forecast)",84047.8282,289.91,21.8252,-4.3679
