In [1]:
# import required libraries 
import os
import warnings 
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')

In [2]:
def data_wrangle(path, dropped_columns=None):
    """A method that cleans the original dataset,
       restructures the dataset, and fills the missing values.
        
    Parameters
    ----------
    path : str
        Data path to the CSV file.
    dropped_columns : list, optional
        Columns to be dropped (default is None).
    
    Returns
    -------
    pd.DataFrame
        Cleaned and structured dataframe.
    """
    
    # Read the dataset from the given path
    df = pd.read_csv(path, header=None, names=["Date", "Time", "Open", "High", "Low", "Close", "Volume"])
    
    # Combine Date and Time columns into a single Date column
    df['Date'] = pd.to_datetime(df['Date'] + ' ' + df['Time'])
    
    # Drop the now redundant Time column
    df.drop(columns=["Time"], inplace=True)
    
    # If there are any unnecessary columns specified, drop them
    if dropped_columns:
        df = df.drop(columns=dropped_columns)
    
    # Set the Date column as the index
    df.set_index('Date', inplace=True)
    
    # Return the cleaned dataframe
    return df

In [3]:
# load the datasets 
df_1min = data_wrangle('data/XAUUSD_1min.csv')
# df_4hr = data_wrangle('data/XAUUSD_4hour.csv')

In [4]:
df_1min.to_csv('data/gold_minutely_data.csv')

In [None]:
# Function to calculate CCI
def calculate_cci(data, period):

    # calculate the typical price
    data['Typical Price'] = (data['High'] + data['Low'] + data['Close']) / 3

    # calculate the simple moving average (SMA) of the Typical Price
    sma = data['Typical Price'].rolling(window=period).mean()

    # calculate the mean deviation
    mean_deviation = data['Typical Price'].rolling(window=period).apply(
        lambda x: pd.Series(x).mad())

    # calculate the CCI
    cci = (data['Typical Price'] - sma) / (0.015 * mean_deviation)
    
    return cci

In [None]:
# calculate CCI for 3-period and 9-period
data['CCI_3'] = calculate_cci(data, 3)
data['CCI_9'] = calculate_cci(data, 9)

# Display the data with CCI columns
print(data[['Date', 'High', 'Low', 'Close', 'CCI_3', 'CCI_9']].tail())