In [2]:
import pandas as pd
import os

RAW_DATA_DIR = r"D:\wear\stock-platform\data\raw"
PROCESSED_DATA_DIR = r"D:\wear\stock-platform\data\processed"

os.makedirs(PROCESSED_DATA_DIR, exist_ok=True)

print(" Starting data preprocessing...\n")

for file in os.listdir(RAW_DATA_DIR):
    if not file.endswith(".csv"):
        continue

    print(f"Processing {file}...")

    # LOAD DATA
    file_path = os.path.join(RAW_DATA_DIR, file)
    df = pd.read_csv(file_path)

    # BASIC CLEANING
    df.dropna(inplace=True)

    # -------- FEATURE ENGINEERING --------
    df["Daily_Return"] = df["Close"].pct_change()
    df["MA_5"] = df["Close"].rolling(window=5).mean()
    df["MA_10"] = df["Close"].rolling(window=10).mean()
    df["Volatility_5"] = df["Close"].rolling(window=5).std()
    df["Price_Change"] = df["Close"] - df["Open"]

    # TARGET VARIABLE
    df["Target"] = (df["Close"].shift(-1) > df["Close"]).astype(int)

    # REMOVE NaN CREATED BY ROLLING
    df.dropna(inplace=True)

    # SAVE PROCESSED DATA 
    output_file = file.replace("_data.csv", "_processed.csv")
    output_path = os.path.join(PROCESSED_DATA_DIR, output_file)
    df.to_csv(output_path, index=False)

    print(f"Saved processed file: {output_path}\n")

print("Data preprocessing completed for all stocks.")


 Starting data preprocessing...

Processing AAPL_data.csv...
Saved processed file: D:\wear\stock-platform\data\processed\AAPL_processed.csv

Processing AMZN_data.csv...
Saved processed file: D:\wear\stock-platform\data\processed\AMZN_processed.csv

Processing GOOGL_data.csv...
Saved processed file: D:\wear\stock-platform\data\processed\GOOGL_processed.csv

Processing HDFCBANK.NS_data.csv...
Saved processed file: D:\wear\stock-platform\data\processed\HDFCBANK.NS_processed.csv

Processing ICICIBANK.NS_data.csv...
Saved processed file: D:\wear\stock-platform\data\processed\ICICIBANK.NS_processed.csv

Processing INFY.NS_data.csv...
Saved processed file: D:\wear\stock-platform\data\processed\INFY.NS_processed.csv

Processing MSFT_data.csv...
Saved processed file: D:\wear\stock-platform\data\processed\MSFT_processed.csv

Processing RELIANCE.NS_data.csv...
Saved processed file: D:\wear\stock-platform\data\processed\RELIANCE.NS_processed.csv

Processing TCS.NS_data.csv...
Saved processed file: