In [3]:
import pandas as pd
import os

def resample_timeframes(input_csv_path, output_folder):
    """
    Resample raw OHLCV CSV data into 30-minute, 1-hour, and 1-day datasets.
    
    Args:
        input_csv_path (str): Path to input CSV file.
        output_folder (str): Path to folder where outputs will be saved.
    """

    # Load input CSV
    df = pd.read_csv(input_csv_path)

    # Detect time column
    if 'timestamp' in df.columns:
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df.set_index('timestamp', inplace=True)
    elif 'time' in df.columns:
        df['time'] = pd.to_datetime(df['time'])
        df.set_index('time', inplace=True)
    else:
        raise ValueError("No recognizable time column found (expected 'timestamp' or 'time').")

    # Sort just in case
    df = df.sort_index()

    # Make sure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # === 30-Minute Resample ===
    df_30m = pd.DataFrame()
    df_30m['open'] = df['open'].resample('30T').first()
    df_30m['high'] = df['high'].resample('30T').max()
    df_30m['low'] = df['low'].resample('30T').min()
    df_30m['close'] = df['close'].resample('30T').last()
    df_30m['volume'] = df['volume'].resample('30T').sum()
    df_30m.dropna(inplace=True)
    df_30m.to_csv(os.path.join(output_folder, 'BTCUSD_30m.csv'))

    print("✅ 30m dataset created and saved.")

    # === 1-Hour Resample ===
    df_1h = pd.DataFrame()
    df_1h['open'] = df_30m['open'].resample('1H').first()
    df_1h['high'] = df_30m['high'].resample('1H').max()
    df_1h['low'] = df_30m['low'].resample('1H').min()
    df_1h['close'] = df_30m['close'].resample('1H').last()
    df_1h['volume'] = df_30m['volume'].resample('1H').sum()
    df_1h.dropna(inplace=True)
    df_1h.to_csv(os.path.join(output_folder, 'BTCUSD_1h.csv'))

    print("✅ 1H dataset created and saved.")

    # === 1-Day Resample ===
    df_1d = pd.DataFrame()
    df_1d['open'] = df_30m['open'].resample('1D').first()
    df_1d['high'] = df_30m['high'].resample('1D').max()
    df_1d['low'] = df_30m['low'].resample('1D').min()
    df_1d['close'] = df_30m['close'].resample('1D').last()
    df_1d['volume'] = df_30m['volume'].resample('1D').sum()
    df_1d.dropna(inplace=True)
    df_1d.to_csv(os.path.join(output_folder, 'BTCUSD_1d.csv'))

    print("✅ 1D dataset created and saved.")

    print("\n🎯 All resampled datasets saved to:", output_folder)


In [12]:
input_csv_path = 'Coinbase_BTCUSD_30m_4years.csv'
output_folder = 'Resampled'

In [13]:
resample_timeframes(input_csv_path, output_folder)

✅ 30m dataset created and saved.
✅ 1H dataset created and saved.
✅ 1D dataset created and saved.

🎯 All resampled datasets saved to: Resampled
