In [72]:
import pandas as pd

In [73]:
def read_files_based_on_paths(path, stocks_csv, price_anomalies_csv, volume_anomalies_csv):
    # Load the CSV files into DataFrames
    stocks_df = pd.read_csv(stocks_csv)
    price_anomalies_df = pd.read_csv(price_anomalies_csv)
    volume_anomalies_df = pd.read_csv(volume_anomalies_csv)
    
    
    price_anomalies_df['price_pred'] = 1
    volume_anomalies_df['volume_pred'] = 1
    
    
    # Combine all DataFrames into one
    combined_df = pd.concat([stocks_df, price_anomalies_df, volume_anomalies_df], ignore_index=True)
    combined_df['ingestion_ts'] = pd.to_datetime(combined_df['ingestion_ts'])
    
    combined_df = combined_df.sort_values(by='ingestion_ts', ascending=True).reset_index(drop=True)
    # Interpolate missing values in 'price' and 'volume' columns
    combined_df['price'] = combined_df['price'].interpolate(method='linear')  # Linear interpolation for 'price'
    combined_df['volume'] = combined_df['volume'].interpolate(method='linear')  # Linear interpolation for 'volume'
    combined_df["price"] = combined_df["price"].fillna(combined_df["price"].mean())
    combined_df["volume"] = combined_df["volume"].fillna(combined_df["volume"].mean())
    
    # Save the combined DataFrame to a new CSV file
    combined_csv = f'{path}/combined_15min.csv'
    combined_df.to_csv(combined_csv, index=False)
    
    print(f"Combined CSV saved to {combined_csv}")

In [74]:
path = "data/z-score"
stocks_csv = f"{path}/stocks_z_15min.csv"
price_anomalies_csv = f"{path}/p_z_15min.csv"
volume_anomalies_csv = f"{path}/v_z_15min.csv"
read_files_based_on_paths(path, stocks_csv, price_anomalies_csv, volume_anomalies_csv)

Combined CSV saved to data/z-score/combined_15min.csv


In [75]:
path = "data/iqr"
stocks_csv = f"{path}/stocks_i_15min.csv"
price_anomalies_csv = f"{path}/p_i_15min.csv"
volume_anomalies_csv = f"{path}/v_i_15min.csv"
read_files_based_on_paths(path, stocks_csv, price_anomalies_csv, volume_anomalies_csv)

Combined CSV saved to data/iqr/combined_15min.csv


In [76]:
path = "data/iforest"
stocks_csv = f"{path}/stocks_if_15min.csv"
price_anomalies_csv = f"{path}/p_if_15min.csv"
volume_anomalies_csv = f"{path}/v_if_15min.csv"
read_files_based_on_paths(path, stocks_csv, price_anomalies_csv, volume_anomalies_csv)

Combined CSV saved to data/iforest/combined_15min.csv


In [77]:
path = "data/arima"
stocks_csv = f"{path}/stocks_a_15min.csv"
price_anomalies_csv = f"{path}/p_a_15min.csv"
volume_anomalies_csv = f"{path}/v_a_15min.csv"
read_files_based_on_paths(path, stocks_csv, price_anomalies_csv, volume_anomalies_csv)

Combined CSV saved to data/arima/combined_15min.csv
