<a href="https://colab.research.google.com/github/ManullangJihan/100-Day-ML-Challenge/blob/main/08_Gold_Price.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ***Gold Close Price Visualization and Prediction***

## Context
---

## Content
---
1. Date
2. Open
3. High
4. Low
5. Close
6. Volume
7. Currency

[Sumber Data](https://www.kaggle.com/datasets/psycon/daily-gold-price-historical-data)

In [None]:
!pip install statsmodels==0.13.2

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import plotly.express as px
import plotly.graph_objects as go

In [3]:
# Setting Visualization Default

import plotly.io as pio

pio.templates.default = "plotly_dark"
plt.rcParams["figure.figsize"] = [12, 9]
plt.rcParams.update({"font.size": 18})

In [4]:
# Setting Random seed For Reproducibility

import random

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)

global_seed = 42
set_seed(global_seed)

In [7]:
# Import Data

path = "/content/drive/MyDrive/gold.csv"
df = pd.read_csv(path, parse_dates=["Date"]).set_index("Date")
df = df.drop(["Open", "High", "Low", "Volume", "Currency"], axis=1)
n_samples, n_features = df.shape

df.head()

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2000-01-04,282.7
2000-01-05,281.1
2000-01-06,281.4
2000-01-07,281.9
2000-01-10,281.7


In [9]:
# Add feature time and lag 1
df["time"] = np.arange(len(df.index))
df["lag_1"] = df.Close.shift(1)

df.dropna(inplace = True)

In [12]:
# Plot Linear Regression with time feature
from sklearn.linear_model import LinearRegression

X = df.time.values.reshape(-1, 1)
y = df.Close.values

reg = LinearRegression().fit(X, y)
y_pred = reg.predict(X)

In [14]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x = df["time"].values, 
    y = y, 
    name = "Observations",
    mode = "lines")
)

fig.add_trace(go.Scatter(
    x = df["time"].values, 
    y = y_pred,
    name = "Predictions (Based on time feature)",
    mode = "lines")
)

fig.update_layout(
    title = "Gold Close Price (Observations + Linear Regression)",
    xaxis_title = "Date",
    yaxis_title = "Close Price"
)

fig.show()

In [19]:
# Plot lag 1 vs Close Price to see Correlation of Gold 
# close price with the price the day before

X = df.lag_1.values.reshape(-1, 1)

reg = LinearRegression().fit(X, y)
y_pred = reg.predict(X)

In [24]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x = df["lag_1"],
    y = y,
    name = "Ground Truth",
    mode = "markers",
    opacity = 0.5)
)

fig.add_trace(go.Scatter(
    x = df["lag_1"], 
    y = y_pred,
    name = "Predictions (Based on lag 1 feature)",
    mode = "lines")
)

fig.update_layout(
    title = "Gold Close Price Relation with the price the day before",
    xaxis_title = "Lag 1",
    yaxis_title = "Close Price"
)

fig.show()

In [27]:
# Plot the Trend
trend_df = df.copy()
trend_df = trend_df.to_period('d')

moving_average = trend_df.rolling(
    window = 365,
    min_periods = 183,
    center = True
).mean()

In [28]:
moving_average.head()

Unnamed: 0_level_0,Close,time,lag_1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-05,282.23224,92.0,282.280328
2000-01-06,282.211413,92.5,282.234783
2000-01-07,282.175676,93.0,282.214054
2000-01-10,282.12957,93.5,282.178495
2000-01-11,282.081283,94.0,282.13262


In [31]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x = df.index,
    y = df.Close,
    name = "Observations",
    mode = "lines")
)

fig.add_trace(go.Scatter(
    x = df.index,
    y = moving_average.Close,
    name = "Moving Average",
    mode = "lines")
)

fig.update_layout(
    title = "Gold Close Price 365 day Moving Average",
    xaxis_title = "Date",
    yaxis_title = "Close Price"
)

fig.show()

# Predict and Plot the trend and seasonal Gold Price

In [36]:
from statsmodels.tsa.deterministic import DeterministicProcess

# Trend
dp = DeterministicProcess(
    index = trend_df.index,
    constant = True, 
    order = 1, 
    drop  = True
)

X = dp.in_sample()
X_test_trend = dp.out_of_sample(steps = 365)

reg = LinearRegression().fit(X, y)
y_trend_pred = reg.predict(X_test_trend)

In [37]:
# Seasonal
from statsmodels.tsa.deterministic import CalendarFourier

fourier = CalendarFourier(freq='A', order=10)
dp = DeterministicProcess(
    index = trend_df.index,
    constant = True,
    order = 1,
    seasonal = True,
    additional_terms = [fourier],
    drop = True
)

X = dp.in_sample()
X_test_seasonal = dp.out_of_sample(steps=365)

reg = LinearRegression().fit(X, y)
y_forecast = reg.predict(X_test_seasonal)

In [40]:
# Plot The Trend and Seasonal Prediction

fig = go.Figure()

fig.add_trace(go.Scatter(
    x = df.index,
    y = y,
    name = "Observations",
    mode = "lines")
)

fig.add_trace(go.Scatter(
    x = X_test_trend.index.to_timestamp(),
    y = y_trend_pred,
    name = "Trend Prediction",
    mode = "lines")
)

fig.add_trace(go.Scatter(
    x = X_test_seasonal.index.to_timestamp(),
    y = y_forecast,
    name = "Seasonal Forecast",
    mode = "lines")
)

fig.update_layout(
    title = "Gold Close Price Trend and Seasonal Forecast",
    xaxis_title = "Date",
    yaxis_title = "Close Price"
)

fig.show()