<a href="https://colab.research.google.com/github/LaFuego20/Project/blob/main/ARIMA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error
import warnings
warnings.filterwarnings("ignore")

# Set seed for reproducibility
np.random.seed(42)

# Load the dataset
df = pd.read_csv("merged_exchange_rates.csv")

# Convert 'Date' to datetime and set as index
df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y')
df.set_index('Date', inplace=True)

# Define the currency columns
currencies = ['USD_NGN', 'GBP_NGN', 'EUR_NGN']

# Subsidy removal date
cutoff_date = pd.to_datetime("2023-05-29")

# ARIMA forecasting function
def run_arima(series, label, currency):
    print(f"\n=== ARIMA(1,1,1) Results for {currency} - {label} ===")

    # Ensure there are enough data points for ARIMA
    if len(series) < 3:
        print(f"Not enough data to run ARIMA for {currency} - {label}")
        return None, None, pd.Series(), pd.Series()

    model = ARIMA(series, order=(1,1,1))
    fitted_model = model.fit()

    # Use integer indices for predict
    start_idx = 1  # Start from the second data point
    end_idx = len(series) - 1 # End at the last data point


    # Ensure valid prediction range
    if start_idx > end_idx:
        print(f"Prediction range is invalid for {currency} - {label}")
        return None, None, pd.Series(), pd.Series()

    forecast = fitted_model.predict(start=start_idx, end=end_idx, typ='levels')
    actual = series.iloc[start_idx:]


    # Check if forecast and actual have the same index
    if not forecast.index.equals(actual.index):
        print(f"Forecast and actual indices do not match for {currency} - {label}")
        # Attempt to reindex forecast to match actual's index if possible
        try:
            forecast = forecast.reindex(actual.index)
        except Exception as e:
            print(f"Error reindexing forecast for {currency} - {label}: {e}")
            return None, None, pd.Series(), pd.Series()


    # Compute evaluation metrics, handle potential NaN values if reindexing introduced them
    if forecast.isnull().any() or actual.isnull().any():
         print(f"NaN values present in forecast or actual after reindexing for {currency} - {label}. Cannot compute metrics.")
         return None, None, actual, forecast
    else:
        rmse = np.sqrt(mean_squared_error(actual, forecast))
        mae = mean_absolute_error(actual, forecast)

        return rmse, mae, actual, forecast

results = {}

for currency in currencies:
    results[currency] = {}

    full_series = df[currency].dropna()
    pre_series = df[df.index < cutoff_date][currency].dropna()
    post_series = df[df.index >= cutoff_date][currency].dropna()

    # Run ARIMA for full series
    rmse_full, mae_full, actual_full, forecast_full = run_arima(full_series, "Full", currency)
    results[currency]['Full'] = {
        'rmse': rmse_full,
        'mae': mae_full,
        'actual': actual_full,
        'forecast': forecast_full
    }

    # Run ARIMA for pre-subsidy series
    rmse_pre, mae_pre, actual_pre, forecast_pre = run_arima(pre_series, "Pre-Subsidy", currency)
    results[currency]['Pre-Subsidy'] = {
        'rmse': rmse_pre,
        'mae': mae_pre,
        'actual': actual_pre,
        'forecast': forecast_pre
    }

    # Run ARIMA for post-subsidy series
    rmse_post, mae_post, actual_post, forecast_post = run_arima(post_series, "Post-Subsidy", currency)
    results[currency]['Post-Subsidy'] = {
        'rmse': rmse_post,
        'mae': mae_post,
        'actual': actual_post,
        'forecast': forecast_post
    }

display(results)


for currency, time_periods in results.items():
    print(f"\n--- {currency} ARIMA(1,1,1) Metrics ---")
    for period, metrics in time_periods.items():
        rmse = metrics['rmse']
        mae = metrics['mae']
        print(f"  {period}:")
        if rmse is not None and mae is not None:
            print(f"    RMSE: {rmse:.4f}")
            print(f"    MAE : {mae:.4f}")
        else:
            print("    Metrics not available (e.g., not enough data or reindexing issue)")


# Create separate figures for each time period with subplots for each currency and save as PNG
for period in ['Pre-Subsidy', 'Post-Subsidy', 'Combined']:
    print(f"\n--- {period} Period ARIMA(1,1,1) Plots ---")
    fig, axes = plt.subplots(len(currencies), 1, figsize=(15, 15), sharex=False) # Create a figure with subplots for each currency

    for i, currency in enumerate(currencies):
        if period == 'Combined':
            axes[i].plot(results[currency]['Pre-Subsidy']['actual'], label=f"{currency} - Actual (Pre-Subsidy)")
            axes[i].plot(results[currency]['Pre-Subsidy']['forecast'].index, results[currency]['Pre-Subsidy']['forecast'], label=f"{currency} - Predicted (Pre-Subsidy)", linestyle="--")
            axes[i].plot(results[currency]['Post-Subsidy']['actual'], label=f"{currency} - Actual (Post-Subsidy)")
            axes[i].plot(results[currency]['Post-Subsidy']['forecast'].index, results[currency]['Post-Subsidy']['forecast'], label=f"{currency} - Predicted (Post-Subsidy)", linestyle="--")
            axes[i].axvline(x=cutoff_date, color='gray', linestyle='--', label='Subsidy Removal Date')
            axes[i].set_title(f"{currency} - Pre- and Post-Subsidy Periods (Combined) (ARIMA(1,1,1))")
        else:
            axes[i].plot(results[currency][period]['actual'], label=f"{currency} - Actual ({period})")
            axes[i].plot(results[currency][period]['forecast'].index, results[currency][period]['forecast'], label=f"{currency} - Predicted ({period})", linestyle="--")
            axes[i].set_title(f"{currency} - {period} Period (ARIMA(1,1,1))")

        axes[i].set_xlabel("Date")
        axes[i].set_ylabel("Exchange Rate")
        axes[i].legend()
        axes[i].tick_params(axis='x', rotation=45)


    plt.tight_layout() # Adjust layout to prevent overlapping titles/labels
    plt.savefig(f'All_Currencies_Subplots_{period}_ARIMA_plot.png') # Save the figure
    plt.close(fig) # Close the figure to free up memory


# Extract RMSE and MAE for pre and post subsidy periods
rmse_pre = [results[c]['Pre-Subsidy']['rmse'] for c in currencies]
rmse_post = [results[c]['Post-Subsidy']['rmse'] for c in currencies]
mae_pre = [results[c]['Pre-Subsidy']['mae'] for c in currencies]
mae_post = [results[c]['Post-Subsidy']['mae'] for c in currencies]

x = np.arange(len(currencies)) # the label locations
width = 0.35 # the width of the bars

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 8)) # Increased figure size for bar plots

# Plot RMSE
rects1 = ax1.bar(x - width/2, rmse_pre, width, label='Pre-Subsidy')
rects2 = ax1.bar(x + width/2, rmse_post, width, label='Post-Subsidy')

ax1.set_ylabel('RMSE')
ax1.set_title('RMSE Pre vs Post Subsidy')
ax1.set_xticks(x)
ax1.set_xticklabels(currencies)
ax1.legend()

# Plot MAE
rects3 = ax2.bar(x - width/2, mae_pre, width, label='Pre-Subsidy')
rects4 = ax2.bar(x + width/2, mae_post, width, label='Post-Subsidy')

ax2.set_ylabel('MAE')
ax2.set_title('MAE Pre vs Post Subsidy')
ax2.set_xticks(x)
ax2.set_xticklabels(currencies)
ax2.legend()

fig.tight_layout()

# Save the bar plots as PNG files
fig.savefig('rmse_mae_barplots.png')

# Close the figure to free up memory
plt.close(fig)


=== ARIMA(1,1,1) Results for USD_NGN - Full ===

=== ARIMA(1,1,1) Results for USD_NGN - Pre-Subsidy ===

=== ARIMA(1,1,1) Results for USD_NGN - Post-Subsidy ===

=== ARIMA(1,1,1) Results for GBP_NGN - Full ===

=== ARIMA(1,1,1) Results for GBP_NGN - Pre-Subsidy ===

=== ARIMA(1,1,1) Results for GBP_NGN - Post-Subsidy ===

=== ARIMA(1,1,1) Results for EUR_NGN - Full ===

=== ARIMA(1,1,1) Results for EUR_NGN - Pre-Subsidy ===

=== ARIMA(1,1,1) Results for EUR_NGN - Post-Subsidy ===


{'USD_NGN': {'Full': {'rmse': np.float64(18.758700092197106),
   'mae': 5.11369190645249,
   'actual': Date
   2021-01-04     386.11
   2021-01-05     388.31
   2021-01-06     388.31
   2021-01-07     389.39
   2021-01-08     383.50
                  ...   
   2025-05-28    1586.64
   2025-05-29    1587.32
   2025-05-30    1586.77
   2025-06-02    1582.45
   2025-06-03    1578.77
   Name: USD_NGN, Length: 1399, dtype: float64,
   'forecast': Date
   2021-01-04     396.700877
   2021-01-05     386.043454
   2021-01-06     388.324146
   2021-01-07     388.309932
   2021-01-08     389.396787
                    ...     
   2025-05-28    1582.620421
   2025-05-29    1586.665273
   2025-05-30    1587.324151
   2025-06-02    1586.766524
   2025-06-03    1582.422871
   Name: predicted_mean, Length: 1399, dtype: float64},
  'Pre-Subsidy': {'rmse': np.float64(3.276384974703891),
   'mae': 1.5729065774462412,
   'actual': Date
   2021-01-04    386.11
   2021-01-05    388.31
   2021-01-06    388.


--- USD_NGN ARIMA(1,1,1) Metrics ---
  Full:
    RMSE: 18.7587
    MAE : 5.1137
  Pre-Subsidy:
    RMSE: 3.2764
    MAE : 1.5729
  Post-Subsidy:
    RMSE: 30.1460
    MAE : 11.1249

--- GBP_NGN ARIMA(1,1,1) Metrics ---
  Full:
    RMSE: 25.7596
    MAE : 8.6527
  Pre-Subsidy:
    RMSE: 5.7839
    MAE : 3.5070
  Post-Subsidy:
    RMSE: 41.1820
    MAE : 17.7955

--- EUR_NGN ARIMA(1,1,1) Metrics ---
  Full:
    RMSE: 21.9602
    MAE : 7.1092
  Pre-Subsidy:
    RMSE: 4.6843
    MAE : 2.8349
  Post-Subsidy:
    RMSE: 35.1385
    MAE : 14.6308

--- Pre-Subsidy Period ARIMA(1,1,1) Plots ---

--- Post-Subsidy Period ARIMA(1,1,1) Plots ---

--- Combined Period ARIMA(1,1,1) Plots ---
