In [1]:
# import required libraries
import pandas as pd

In [2]:
def resample_data(df: pd.DataFrame, interval: str) -> pd.DataFrame:
    """
    Resample the dataframe to a specified time interval.

    Parameters
    ----------
    df : pd.DataFrame
    Input dataframe with datetime index.
    interval : str
    Time interval for resampling (e.g., '3h', '4h', '6h', '9h').

    Returns
    -------
    pd.DataFrame
    Resampled dataframe with Open, High, Low, Close, and Volume columns.
    """

    # ensure 'Date' is a datetime and set it as the index
    df.index = pd.to_datetime(df.index)
    
    # resample the dataset to the specified interval
    df_resampled = df.resample(interval).agg({
                   'Open': 'first',
                   'High': 'max',
                   'Low': 'min',
                   'Close': 'last',
                   'Volume': 'sum'
                })

    # drop missing values
    df_resampled.dropna(inplace=True)

    return df_resampled

In [3]:
# read the minutely data files
df = pd.read_csv('data/gold_minutely_data.csv', index_col='Date', parse_dates = True)

In [4]:
# resample to daily data
df_r = resample_data(df, '24h')

In [5]:
df_r.tail(10)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-09-24,2627.33,2664.26,2622.54,2659.16,214639
2024-09-25,2658.99,2670.43,2649.58,2658.13,112129
2024-09-26,2658.12,2685.49,2654.71,2672.19,179144
2024-09-27,2672.26,2674.21,2642.92,2658.14,183166
2024-09-29,2659.0,2663.45,2658.93,2662.57,3791
2024-09-30,2662.55,2665.94,2624.62,2634.82,168598
2024-10-01,2634.85,2673.09,2633.77,2661.88,160839
2024-10-02,2661.98,2663.2,2641.01,2659.34,182988
2024-10-03,2659.33,2662.9,2635.01,2656.04,180073
2024-10-04,2655.92,2667.75,2654.17,2662.74,21058


In [6]:
df_org = pd.read_csv('data/gold_daily_data.csv', index_col='Date', parse_dates=True)

In [9]:
df_org.tail(10)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-09-24,2627.33,2664.26,2622.54,2659.16,214639
2024-09-25,2658.99,2670.43,2649.58,2658.13,112130
2024-09-26,2658.12,2685.49,2654.71,2672.19,179160
2024-09-27,2672.26,2674.21,2642.92,2658.14,183441
2024-09-29,2659.0,2663.45,2658.93,2662.57,3791
2024-09-30,2662.55,2665.94,2624.62,2634.82,168640
2024-10-01,2634.85,2673.09,2633.77,2661.88,160841
2024-10-02,2661.98,2663.2,2641.01,2659.34,182989
2024-10-03,2659.33,2662.9,2635.01,2656.04,180074
2024-10-04,2655.92,2667.75,2654.17,2662.52,20531


In [8]:
df_org.tail(10) == df_r.tail(10)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-09-24,True,True,True,True,True
2024-09-25,True,True,True,True,False
2024-09-26,True,True,True,True,False
2024-09-27,True,True,True,True,False
2024-09-29,True,True,True,True,True
2024-09-30,True,True,True,True,False
2024-10-01,True,True,True,True,False
2024-10-02,True,True,True,True,False
2024-10-03,True,True,True,True,False
2024-10-04,True,True,True,False,False


In [16]:
df_org_seg1 = df_org.loc["2024-05-24":"2024-06-06"]

In [17]:
df_org_seg1.head(10)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-05-24,2328.71,2347.37,2325.46,2333.18,77499
2024-05-27,2335.59,2358.52,2331.68,2350.08,60048
2024-05-28,2352.38,2364.14,2340.32,2360.6,77999
2024-05-29,2360.08,2361.37,2334.61,2338.04,84994
2024-05-30,2338.57,2351.77,2322.74,2342.61,89526
2024-05-31,2343.47,2359.8,2320.6,2329.45,85938
2024-06-03,2328.11,2354.76,2314.77,2350.9,86476
2024-06-04,2351.39,2352.83,2315.62,2326.81,85805
2024-06-05,2328.19,2357.41,2325.92,2354.82,80231
2024-06-06,2355.66,2378.5,2353.44,2375.96,78712


In [11]:
df_r.head(10)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-05-24,2331.07,2347.37,2325.46,2333.18,75992
2024-05-27,2335.59,2358.52,2331.68,2350.08,60048
2024-05-28,2352.38,2364.14,2340.32,2360.6,77999
2024-05-29,2360.08,2361.37,2334.61,2338.04,84994
2024-05-30,2338.57,2351.77,2322.74,2342.61,89526
2024-05-31,2343.47,2359.8,2320.6,2329.45,85938
2024-06-03,2328.11,2354.76,2314.77,2350.9,86476
2024-06-04,2351.39,2352.83,2315.62,2326.81,85805
2024-06-05,2328.19,2357.41,2325.92,2354.82,80231
2024-06-06,2355.66,2378.5,2353.44,2375.96,78712


In [18]:
df_org_seg1 == df_r.head(10)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-05-24,False,True,True,True,False
2024-05-27,True,True,True,True,True
2024-05-28,True,True,True,True,True
2024-05-29,True,True,True,True,True
2024-05-30,True,True,True,True,True
2024-05-31,True,True,True,True,True
2024-06-03,True,True,True,True,True
2024-06-04,True,True,True,True,True
2024-06-05,True,True,True,True,True
2024-06-06,True,True,True,True,True
