# 1.1 Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from statsmodels.tsa.seasonal import seasonal_decompose

from dateutil import parser

from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error

import matplotlib.dates as mdates

import warnings
warnings.filterwarnings('ignore')

# 1.2 Importing CSV FIle

In [2]:
df = pd.read_csv('goldstock.csv')
df.head()

FileNotFoundError: [Errno 2] No such file or directory: 'goldstock.csv'

# 1.3 Removing Unnamed column:

In [None]:
df.drop(columns=['Unnamed: 0'], inplace=True)
df.head()

In [None]:
df.info()

In [None]:
print(df.isnull().sum())
print("There is no null values, means the dataset is maintained or collected very well.")

In [None]:
df.describe()

# 1.4 Convert 'Date' column to datetime format

In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df.head()

# 1.5 Set 'Date' as the index

In [None]:
df.set_index('Date', inplace=True)
df.head()

# 1.6 Plotting the Closing Price:

In [None]:
# Checking available styles
print(plt.style.available)

plt.style.use('seaborn-v0_8-dark-palette')
plt.figure(figsize=(14, 7))

plt.figure(figsize=(14, 7))
plt.plot(df['Close'], label='Closing Price')
plt.title('Gold Closing Prices Over Time')
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.legend()
plt.show()

## Insight from Plot:

The chart illustrates the overall increasing trend in gold prices over the past decade, with significant volatility in recent years. This trend could be influenced by various economic factors, including market demand, global economic conditions, and geopolitical events.

# 1.7 Seasonal Decomposition:

In [None]:
decomposition = seasonal_decompose(df['Close'], model='multiplicative', period=365)
decomposition.plot()
plt.show()

## Insight from Plot:

Downward Trend: The long-term trend of gold prices is downward, with a significant decline followed by a slight recovery towards the end of the period. Seasonal Pattern: There is a clear seasonal pattern in gold prices, with regular fluctuations occurring annually. Effective Decomposition: The residuals are well-behaved, suggesting that the decomposition model has effectively captured the trend and seasonal components.

In [None]:
from statsmodels.tsa.stattools import adfuller

result = adfuller(df.Close.dropna())
print(f"ADF Statistic: {result[0]}")
print(f"p-value: {result[1]}")

In [None]:
from statsmodels.graphics.tsaplots import plot_acf

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16,4))

ax1.plot(df.Close)
ax1.set_title("Original")

plot_acf(df.Close, ax=ax2);

In [None]:
diff = df.Close.diff().dropna()

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16,4))

ax1.plot(diff)
ax1.set_title("Difference once")

plot_acf(diff, ax=ax2);

In [None]:
diff = df.Close.diff().diff().dropna()

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16,4))

ax1.plot(diff)
ax1.set_title("Difference twice")

plot_acf(diff, ax=ax2);

We can use the pmdarima package to get the number of differencing.

p

In [None]:
from statsmodels.graphics.tsaplots import plot_pacf

In [None]:
diff = df.Close.diff().dropna()

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16,4))

ax1.plot(diff)
ax1.set_title("Difference once")
ax2.set_ylim(0, 1)

plot_pacf(diff, ax=ax2);

q

In [None]:
diff = df.Close.diff().dropna()

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16,4))

ax1.plot(diff)
ax1.set_title("Difference once")
ax2.set_ylim(0, 1)

plot_acf(diff, ax=ax2);

# 2.1 ARIMA Model for Forecasting:

In [None]:
#df = df.sort_values(date).reset_index(drop=True)

from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error

df = df.sort_index()
# Split the data into train and test sets
train = df['Close'][:int(0.8 * len(df))]
test = df['Close'][int(0.8 * len(df)):]

# 2.2 Fit ARIMA model

In [None]:
model = ARIMA(train, order=(0, 1, 0))
model_fit = model.fit()

# 2.3 Forecast

In [None]:
n_steps = len(test)
forecast_res = model_fit.get_forecast(steps=n_steps)
forecast = forecast_res.predicted_mean
forecast.index = test.index 

# 2.4 Plot the results

In [None]:
plt.figure(figsize=(14, 7))
plt.plot(train, label='Train')
plt.plot(test, label='Test')
plt.plot(forecast, label='Forecast')
plt.legend()
plt.show()

## Insight from Plot:

The plot illustrates the use of historical gold prices for model training and testing, with a brief forecast provided. This approach helps in understanding how well the model can predict future gold prices based on past trends.

# 2.5 Evaluate the model

In [None]:
rmse = np.sqrt(mean_squared_error(test, forecast))
print(f'RMSE: {rmse}')