In [1]:
import pandas as pd
import numpy as np
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
from sklearn.metrics import mean_squared_error,r2_score,mean_absolute_error,mean_absolute_percentage_error,root
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import warnings
warnings.filterwarnings('ignore')
plt.rcParams['figure.figsize'] = (15,5)
plt.rcParams['figure.dpi'] = 250
sns.set_style('darkgrid')
%matplotlib inline

ImportError: cannot import name 'root' from 'sklearn.metrics' (C:\Users\hp\anaconda3\Lib\site-packages\sklearn\metrics\__init__.py)

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/PrathameshR10/Assignments/main/Folder/exchange_rate.csv',index_col=0)

In [None]:
df.index = pd.to_datetime(df.index,infer_datetime_format=True)
df

In [None]:
df.isna().sum()      # No missing values

In [None]:
import plotly.graph_objects as go
fig = go.Figure()

fig.add_trace(go.Scatter(x=df.index, y=df['Ex_rate'], marker_color='lightgreen'))

fig.update_layout(title='TIME-SERIES PLOT OF USD-AUD', 
                  height=450, width=1000, template='plotly_dark', font_color='lightgreen', 
                  font=dict(family="sans serif",
                            size=16,
                            color="grey"
                            ))

fig.update_xaxes(title='Date')
fig.update_yaxes(title='Rate / $')
fig.show()

### Test for Stationarity of Data

#### ACF & PACF Plots

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

plot_acf(df['Ex_rate'])
plot_pacf(df['Ex_rate'])

#### ADF Test for Stationarity 

In [None]:
adf = adfuller(df['Ex_rate'],autolag='AIC')

In [None]:
adf_result = pd.Series({
    'Test Statistic': adf[0],
    'p-value': adf[1],
    'Number of Lags Used': adf[2],
    'Number of Observations Used': adf[3]
})

# Add critical values to the Series
for key, value in adf[4].items():
    adf_result[f'Critical Value ({key})'] = value

In [None]:
adf_result

In [None]:
# Null hypothesis: Data is Non stationary

# Alternate Hypothesis: Data is Stationary

    # If p< 0.05 ; Data is stationary

    # if p>0.05; Data is not stationary

# According the p-value, the dataset is not stationary as the p value > alpha value.

In [None]:
# TO make the dataset stationary, their are methods like:
        # 1. Log Transformation
        # 2. Differencing
# We will use differencing method to make our data stationary.

#### Differencing for Stationarity

In [None]:
df = df.diff().dropna()

#### Check for Stationarity

In [None]:
adf = adfuller(df['Ex_rate'],autolag='AIC')
adf_result = pd.Series({
    'Test Statistic': adf[0],
    'p-value': adf[1],
    'Number of Lags Used': adf[2],
    'Number of Observations Used': adf[3]
})

# Add critical values to the Series
for key, value in adf[4].items():
    adf_result[f'Critical Value ({key})'] = value

In [None]:
print(adf_result)

In [None]:
# Null hypothesis: Data is Non stationary

# Alternate Hypothesis: Data is Stationary

    # If p< 0.05 ; Data is stationary

    # if p>0.05; Data is not stationary

# According the p-value, the dataset is stationary as the p value < alpha value.

In [None]:
# Split data into train and test sets
train, test = df[:-30], df[-30:]

print('Train Shape: ', train.shape)
print('Test Shape: ', test.shape)

## Model Building-ARIMA

In [None]:
# Define the order using auto_arima.
import itertools
# Here we define a range of parameters for p, d, q to perform grid search
p = range(1, 4)
d = range(0, 2)
q = range(1, 3)
pdq = list(itertools.product(p, d, q))

aics = []
params = []

# Grid search to find the best ARIMA parameters based on AIC
for param in pdq:
    try:
        model = ARIMA(train['Ex_rate'], order=param)
        model_fit = model.fit()
        aic = model_fit.aic
        aics.append(aic)
        params.append(param)
    except Exception as e:
        continue

In [None]:
# Find the best parameters
combo = list(zip(aics, params))
combo.sort()
best_aic, best_param = combo[0]
print(f"Best AIC: {best_aic}")
print(f"Best Parameters: {best_param}")

In [None]:
# Fit the best ARIMA model
model = ARIMA(train['Ex_rate'], order=best_param)
model_fit = model.fit()

# Print model summary
print(model_fit.summary())

In [None]:
import statsmodels.api as sm
predictions = []

arima = sm.tsa.statespace.SARIMAX(train.Ex_rate,order=(1,0,1),seasonal_order=(1,0,1,6),
                                  enforce_stationarity=False, enforce_invertibility=False,).fit()
#get a 30 days prediction
predictions.append(arima.forecast(30))
#converting and reshaping 
predictions = np.array(predictions).reshape((30,))

#### Residual Analysis 

In [None]:
residuals = model_fit.resid
plt.figure(figsize=(10, 6))
plt.plot(residuals)
plt.title('ARIMA Model Residuals')
plt.show()

#### Forecasting

In [None]:
forecast_steps = len(test)
forecast = model_fit.get_forecast(steps=forecast_steps)
forecast_index = test.index
forecast_series = pd.Series(forecast.predicted_mean, index=forecast_index)

In [None]:
# Forecast future values beyond the test period
future_periods = 12 # Number of future periods to predict
future_index = pd.date_range(start=test.index[-1], periods=future_periods + 1, freq='B')[1:]
future_forecast = model_fit.forecast(steps=future_periods)

In [None]:
## Plotting actual and forecasted values
plt.figure(figsize=(12, 6))
plt.plot(train['Ex_rate'], label='Data')
plt.plot(test['Ex_rate'], label='Forecast')
plt.title('Foreign Exchange Rate Prediction, USD-AUD')
plt.xlabel('Date')
plt.ylabel('Foreign Exchange Rate')
plt.legend()
plt.show()

In [None]:
y_val = df.Ex_rate[-30:]
plt.figure(figsize=(14,5))
plt.plot(np.arange(len(y_val)), y_val, color='steelblue');
plt.plot(np.arange(len(y_val)), predictions, color='salmon');
plt.legend(['True Value', 'Prediction']);

In [None]:
ypred = model_fit.predict(start = 7558,end=7587)

In [None]:
mse_a = mean_squared_error(test['Ex_rate'], ypred)
print(f"Mean Squared Error: {mse_a}")

## Model Building-Exponential Smoothing

In [None]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing

In [None]:
seasonal_periods = [12] 
trend = ['add', 'mul', None]
seasonal = ['add', 'mul', None]

In [None]:
# Store the best model parameters
best_aic = float('inf')
best_params = None
best_model = None

for t, s in itertools.product(trend, seasonal):
    try:
        model = ExponentialSmoothing(train['Ex_rate'], trend=t, seasonal=s, seasonal_periods=seasonal_periods[0])
        model_fit = model.fit(optimized=True)
        aic = model_fit.aic
        if aic < best_aic:
            best_aic = aic
            best_params = (t, s)
            best_model = model_fit
    except Exception as e:
        continue

print(f"Best AIC: {best_aic}")
print(f"Best Parameters: Trend={best_params[0]}, Seasonal={best_params[1]}")

In [None]:
best_model = ExponentialSmoothing(train['Ex_rate'], trend=best_params[0], seasonal=best_params[1], seasonal_periods=seasonal_periods[0])
best_model_fit = best_model.fit(optimized=True)

In [None]:
# Forecast future values for the test period
test_forecast = best_model_fit.forecast(steps=len(test))

In [None]:
all_predictions = pd.concat([train['Ex_rate'], test_forecast, pd.Series(future_forecast, index=pd.date_range(start=test.index[-1], periods=future_periods+1, closed='right'))])

# Plot the actual vs predicted values
plt.figure(figsize=(12, 6))
plt.plot(train['Ex_rate'], label='Data')
plt.plot(test['Ex_rate'], label='Forecast')
plt.title('Foreign Exchange Rate Prediction, USD-AUD')
plt.xlabel('Date')
plt.ylabel('Foreign Exchange Rate')
plt.legend(loc='upper left')
plt.show()

In [None]:
mse_es = mean_squared_error(test['Ex_rate'], test_forecast)
print(f"Mean Squared Error: {mse_es}")

### Comparison & Analysis