In [1]:
# import required libraries 
import os
import warnings 
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')

In [2]:
def data_wrangle(path, dropped_columns=None):
    """A method that cleans the original dataset,
       restructures the dataset, and fills the missing values.
        
    Parameters
    ----------
    path : str
        Data path to the CSV file.
    dropped_columns : list, optional
        Columns to be dropped (default is None).
    
    Returns
    -------
    pd.DataFrame
        Cleaned and structured dataframe.
    """
    
    # Read the dataset from the given path
    df = pd.read_csv(path, header=None, names=["Date", "Time", "Open", "High", "Low", "Close", "Volume"])
    
    # Combine Date and Time columns into a single Date column
    df['Date'] = pd.to_datetime(df['Date'] + ' ' + df['Time'])
    
    # Drop the now redundant Time column
    df.drop(columns=["Time"], inplace=True)
    
    # If there are any unnecessary columns specified, drop them
    if dropped_columns:
        df = df.drop(columns=dropped_columns)
    
    # Set the Date column as the index
    df.set_index('Date', inplace=True)
    
    # Return the cleaned dataframe
    return df

In [3]:
# load the datasets 
minutely_data = pd.read_csv("data/gold_minutely_data.csv", parse_dates=['Date'], index_col='Date')
hourly_data = pd.read_csv("data/gold_hourly_data.csv", parse_dates=['Date'], index_col='Date')
daily_data = pd.read_csv("data/gold_daily_data.csv", parse_dates=['Date'], index_col='Date')

In [4]:
# Function to calculate CCI
def calculate_cci(data, period):

    # calculate the typical price
    data['Typical Price'] = (data['High'] + data['Low'] + data['Close']) / 3

    # calculate the simple moving average (SMA) of the Typical Price
    sma = data['Typical Price'].rolling(window=period).mean()

    # Calculate the mean deviation manually
    mean_deviation = data['Typical Price'].rolling(window=period).apply(
        lambda x: np.mean(np.abs(x - x.mean()))
    )



    # calculate the CCI
    cci = (data['Typical Price'] - sma) / (0.015 * mean_deviation)
    
    return cci

In [5]:
# calculate CCI for 3-period and 9-period
hourly_data['CCI_3'] = calculate_cci(hourly_data, 3)
hourly_data['CCI_9'] = calculate_cci(hourly_data, 9)

# Display the data with CCI columns
print(hourly_data.tail())

                        Open     High      Low    Close  Volume  \
Date                                                              
2024-08-29 04:00:00  2512.77  2516.68  2512.29  2516.54    4192   
2024-08-29 05:00:00  2516.47  2518.12  2515.39  2515.74    4207   
2024-08-29 06:00:00  2515.63  2519.90  2513.64  2518.63    7242   
2024-08-29 07:00:00  2518.61  2521.05  2516.07  2516.74    9963   
2024-08-29 08:00:00  2516.88  2518.32  2515.68  2517.91    4609   

                     Typical Price       CCI_3       CCI_9  
Date                                                        
2024-08-29 04:00:00    2515.170000  100.000000  113.167929  
2024-08-29 05:00:00    2516.416667   84.801489  106.599931  
2024-08-29 06:00:00    2517.390000   92.115385  102.704343  
2024-08-29 07:00:00    2517.953333   83.665339  100.755307  
2024-08-29 08:00:00    2517.303333  -60.714286   71.239745  


In [6]:
# Function to calculate CCI
def calculate_cci_fast(data, period):

    # calculate the typical price
    data['Typical Price'] = (data['High'] + data['Low'] + data['Close']) / 3

    # calculate the simple moving average (SMA) of the Typical Price
    sma = data['Typical Price'].rolling(window=period).mean()

    # Calculate the mean deviation manually
    mean_deviation = data['Typical Price'].rolling(window=period).apply(
        lambda x: (np.abs(x - x.mean()).mean()), raw=True
    )



    # calculate the CCI
    cci = (data['Typical Price'] - sma) / (0.015 * mean_deviation)
    
    return cci

In [7]:
# calculate CCI for 3-period and 9-period
hourly_data['CCI_3'] = calculate_cci_fast(hourly_data, 3)
hourly_data['CCI_9'] = calculate_cci_fast(hourly_data, 9)

# Display the data with CCI columns
print(hourly_data.tail())

                        Open     High      Low    Close  Volume  \
Date                                                              
2024-08-29 04:00:00  2512.77  2516.68  2512.29  2516.54    4192   
2024-08-29 05:00:00  2516.47  2518.12  2515.39  2515.74    4207   
2024-08-29 06:00:00  2515.63  2519.90  2513.64  2518.63    7242   
2024-08-29 07:00:00  2518.61  2521.05  2516.07  2516.74    9963   
2024-08-29 08:00:00  2516.88  2518.32  2515.68  2517.91    4609   

                     Typical Price       CCI_3       CCI_9  
Date                                                        
2024-08-29 04:00:00    2515.170000  100.000000  113.167929  
2024-08-29 05:00:00    2516.416667   84.801489  106.599931  
2024-08-29 06:00:00    2517.390000   92.115385  102.704343  
2024-08-29 07:00:00    2517.953333   83.665339  100.755307  
2024-08-29 08:00:00    2517.303333  -60.714286   71.239745  
