In [1]:
import pandas as pd
import os

In [2]:
input_dir = "../data/uncleaned/ohlcv"  
output_dir = "../data/cleaned/ohlcv" 
os.makedirs(output_dir, exist_ok=True)  

In [7]:
for file_name in os.listdir(input_dir):
    if file_name.endswith('.csv'):  # Process only CSV files
        file_path = os.path.join(input_dir, file_name)
        
        # Load the CSV file into a DataFrame
        df = pd.read_csv(file_path)
        
        # Ensure the necessary columns exist
        if {'open', 'high', 'low', 'close'}.issubset(df.columns):
            
            # Calculate the distances
            df['OH_dist'] = df['high'] - df['open']
            df['OL_dist'] = df['open'] - df['low']
            df['HL_dist'] = df['high'] - df['low']
            df['LC_dist'] = df['close'] - df['low']
            df['HC_dist'] = df['high'] - df['close']
            
            # Total distances
            df['OHLC_dist'] = df['OH_dist'] + df['HL_dist'] + df['LC_dist']
            df['OLHC_dist'] = df['OH_dist'] + df['HL_dist'] + df['HC_dist']
            
            # Calculate means
            df['OH_mean'] = (df['open'] + df['high']) / 2
            df['OL_mean'] = (df['open'] + df['low']) / 2
            df['HL_mean'] = (df['high'] + df['low']) / 2
            df['LC_mean'] = (df['low'] + df['close']) / 2
            df['HC_mean'] = (df['high'] + df['close']) / 2
            
            # Calculate TWAP components
            df['OHLC_twap'] = (
                (df['OH_dist'] / df['OHLC_dist']) * df['OH_mean'] +
                (df['HL_dist'] / df['OHLC_dist']) * df['HL_mean'] +
                (df['LC_dist'] / df['OHLC_dist']) * df['LC_mean']
            )
            
            df['OLHC_twap'] = (
                (df['OL_dist'] / df['OLHC_dist']) * df['OL_mean'] +
                (df['HL_dist'] / df['OLHC_dist']) * df['HL_mean'] +
                (df['HC_dist'] / df['OLHC_dist']) * df['HC_mean']
            )
            
            # Final TWAP
            df['TWAP'] = (df['OHLC_twap'] + df['OLHC_twap']) / 2
            
            df = df[['timestamp', 'open', 'high', 'low', 'close', 'volume', 'TWAP']]
            
            # Save the updated DataFrame to a new CSV file
            output_file_path = os.path.join(output_dir, file_name)
            df.to_csv(output_file_path, index=False)
            
            print(f"Processed {file_name} and saved to {output_file_path}")
        else:
            print(f"File {file_name} is missing required columns. Skipping.")

print("TWAP calculation completed for all files.")

Processed 1000000MOG_ohlcv.csv and saved to ../data/cleaned/ohlcv\1000000MOG_ohlcv.csv
Processed 1000BONK_ohlcv.csv and saved to ../data/cleaned/ohlcv\1000BONK_ohlcv.csv
Processed 1000BTT_ohlcv.csv and saved to ../data/cleaned/ohlcv\1000BTT_ohlcv.csv
Processed 1000FLOKI_ohlcv.csv and saved to ../data/cleaned/ohlcv\1000FLOKI_ohlcv.csv
Processed 1000LUNC_ohlcv.csv and saved to ../data/cleaned/ohlcv\1000LUNC_ohlcv.csv
Processed 1000PEPE_ohlcv.csv and saved to ../data/cleaned/ohlcv\1000PEPE_ohlcv.csv
Processed 1000SATS_ohlcv.csv and saved to ../data/cleaned/ohlcv\1000SATS_ohlcv.csv
Processed 1000SHIB_ohlcv.csv and saved to ../data/cleaned/ohlcv\1000SHIB_ohlcv.csv
Processed 1000XEC_ohlcv.csv and saved to ../data/cleaned/ohlcv\1000XEC_ohlcv.csv
Processed 1INCH_ohlcv.csv and saved to ../data/cleaned/ohlcv\1INCH_ohlcv.csv
Processed AAVE_ohlcv.csv and saved to ../data/cleaned/ohlcv\AAVE_ohlcv.csv
Processed ADA_ohlcv.csv and saved to ../data/cleaned/ohlcv\ADA_ohlcv.csv
Processed AERO_ohlcv.csv a