In [1]:
import pandas as pd

In [2]:
def read_files_based_on_paths(path, stocks_csv, price_anomalies_csv, volume_anomalies_csv):
    # Load the CSV files into DataFrames
    stocks_df = pd.read_csv(stocks_csv)
    price_anomalies_df = pd.read_csv(price_anomalies_csv)
    volume_anomalies_df = pd.read_csv(volume_anomalies_csv)
    
    
    price_anomalies_df['price_pred'] = 1
    volume_anomalies_df['volume_pred'] = 1
    
    
    # Combine all DataFrames into one
    combined_df = pd.concat([stocks_df, price_anomalies_df, volume_anomalies_df], ignore_index=True)
    combined_df['ingestion_ts'] = pd.to_datetime(combined_df['ingestion_ts'])
    
    combined_df = combined_df.sort_values(by='ingestion_ts', ascending=True).reset_index(drop=True)
    # Interpolate missing values in 'price' and 'volume' columns
    combined_df['price'] = combined_df['price'].interpolate(method='linear')  # Linear interpolation for 'price'
    combined_df['volume'] = combined_df['volume'].interpolate(method='linear')  # Linear interpolation for 'volume'
    combined_df["price"] = combined_df["price"].fillna(combined_df["price"].mean())
    combined_df["volume"] = combined_df["volume"].fillna(combined_df["volume"].mean())
    
    # Save the combined DataFrame to a new CSV file
    combined_csv = f'{path}/combined_15min.csv'
    combined_df.to_csv(combined_csv, index=False)
    
    print(f"Combined CSV saved to {combined_csv}")

In [3]:
paths = ["z-score", "iqr", "iforest", "arima"]
files = ["base_15min.csv", "p_15min.csv", "v_15min.csv"]
for path in paths:
    stocks_csv = f"data/{path}/{files[0]}"
    price_anomalies_csv = f"data/{path}/{files[1]}"
    volume_anomalies_csv = f"data/{path}/{files[2]}"
    read_files_based_on_paths(f"data/{path}", stocks_csv, price_anomalies_csv, volume_anomalies_csv)

Combined CSV saved to data/z-score/combined_15min.csv
Combined CSV saved to data/iqr/combined_15min.csv
Combined CSV saved to data/iforest/combined_15min.csv
Combined CSV saved to data/arima/combined_15min.csv
