## Collecting Historical Gold Price Patterns 

In [None]:
# import required libraries
import pandas as pd
import numpy as np
import yfinance as yf 
import seaborn as sns
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from pandas.plotting import autocorrelation_plot
from statsmodels.tsa.stattools import adfuller, acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from sklearn.metrics import mean_absolute_error, mean_squared_error
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
# download the gold data from yfinance module 

# one day - 1 min interval
gold_one_day = yf.download('GC=F', interval="1m")
# one month - 5 min interval
gold_one_month = yf.download('GC=F', interval="5m", period="1mo")
# one year - 1 day interval
gold_one_year = yf.download('GC=F', period="1y")
# ten years - 1 day interval 
gold_ten_year = yf.download('GC=F', period="10y")

In [None]:
# save the data as csv file
gold_one_day.to_csv('data/gold_one_day.csv')
gold_one_month.to_csv('data/gold_one_month.csv')
gold_one_year.to_csv('data/gold_one_year.csv')
gold_ten_year.to_csv('data/gold_ten_year.csv')

In [None]:
def data_wrangle(path, droped_columns):
    """ A method that will clean the original dataset, 
        restructure the dataset and fill the missing values.
        
        input
        -----
        path: data path 
        dropped_columns: columns to be dropped"""
    
    # read the dataset through the path
    df=pd.read_csv(path)
    # set 'Date' as the dataframe index
    df = df.set_index('Date')
    # drop the unnecessary columns that are already specified 
    df = df.drop(columns=droped_columns)
    

    return df

In [None]:
df_one_year = data_wrangle(path = "data/gold_one_year.csv", droped_columns="Adj Close")
df_one_year.head(5)
print('-'*100)
# check the information of the dataframe 
df_one_year.info()
print('-'*100)
# display the statics of the data frame 
gold_one_year.describe()


In [None]:
# checkpoints 
print(df_one_year.iloc[[0]])
print('-'*100)
print(df_one_year.iloc[[0]].index)
print('-'*100)
print(df_one_year.iloc[[0]]["High"])

In [None]:
interval = 30
for start in range(0, len(df_one_year), interval):
    end = start + 30
    segment = df_one_year[start:end]
#print(segment.index)
#print(segment.index.day)
#print(segment[:end])
for i in segment[:end]:
    for key, value in zip(segment.index, segment[:end][i]):
        print(f"{i}")
        #print(key.year, key.month, key.day)
        print(value)

In [None]:
# Function to plot and save images 
def plot_images(data, interval, output_dir):
    """A function that segements out the date inteval and plot
    on a figure.
    
    input
    -----
    data : dataframe 
    intreval : date interval
    
    output
    ------
    plotted figure"""
    # loop the whole dataframe with interval (days) steps
    for start in range(0, len(data) - interval, interval):
        # define the end 
        end = start + interval
        # segment the dataframe with specifed start and end index
        segment = data[start:end]
        # loop inside each segment 
        for label in segment[:end]:
            #print(label)
            # zip the segment index and label value 
            #for index, value in zip(segment.index, segment[:end][label]):
                #print(f"{label}: {index} {value}")
            plt.figure(figsize=(20, 10), dpi=300)
            plt.plot(segment.index, segment[label], linewidth=3)
            plt.title(f"{label} from {segment.index[0]} to {segment.index[-1]}")
            plt.xlabel("Date")
            # Rotating X-axis labels
            plt.xticks(rotation = 90)
            plt.ylabel(label)
            plt.savefig(f"{output_dir}/{label}_{segment.index[0]}_{segment.index[-1]}.png")
            plt.close()
        

In [None]:
plot_images(data = df_one_year, interval = 30, output_dir = "images")

In [None]:
# Function to plot and save images
"""
def plot_and_save_images(data, interval, output_dir):
    for start in range(0, len(data) - interval, interval):
        end = start + interval
        segment = data[start:end]
        plt.figure(figsize=(10, 5), dpi=300)
        plt.plot(segment.index, segment['Price'], linewidth=3)
        plt.title(f"Gold Price from {segment.index[0]} to {segment.index[-1]}")
        plt.xlabel("Date")
        plt.ylabel("Price")
        #plt.savefig(f"{output_dir}/gold_price_{start}_{end}.png")
        #plt.close()
"""