In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import yfinance as yf
import ta
from ta import momentum, trend 
import numpy as np
import glob
import gc
import os
from tqdm import tqdm

In [6]:
# Path to the folder
folder_path = '/media/mahir_uddin/Mahir/5-2/Data Analytics/Projects/Stock Trend/Assignment Dataset/stocks'

# Get list of CSV files
csv_files = glob.glob(os.path.join(folder_path, '*.csv'))

# Load into dictionary
stock_data = {}

for file in tqdm(csv_files):
    try:
        ticker = os.path.basename(file).replace('.csv', '')
        df = pd.read_csv(file)
        df = df.drop(columns=['Date','Open','High','Low','Close'])
        df = df.dropna()
        df['Daily_Return_%'] = df['Adj Close'].pct_change(fill_method=None).fillna(0) * 100
        stock_data[ticker] = df   
        
        del df
        gc.collect()
        
    except Exception as e:
        print(f"Error loading {file}: {e}")


100%|███████████████████████████████████████| 5884/5884 [15:07<00:00,  6.48it/s]


In [7]:
def add_indicators(stock_data, indicators):
    combined_df = []
    x = 0

    for ticker, df in stock_data.items():
        x += 1
        if x % 500 == 0:
            print(f"Processed {x} stocks...")

        df = df.copy()  # avoid modifying the original dictionary

        # --- SMA ---
        if 'SMA' in indicators:
            sma_periods = indicators.get('SMA', [14])
            if not isinstance(sma_periods, list):
                sma_periods = [sma_periods]
            for p in sma_periods:
                df[f'SMA_{p}'] = ta.trend.SMAIndicator(close=df['Adj Close'], window=p).sma_indicator()

        # --- EMA ---
        if 'EMA' in indicators:
            ema_periods = indicators.get('EMA', [14])
            if not isinstance(ema_periods, list):
                ema_periods = [ema_periods]
            for p in ema_periods:
                df[f'EMA_{p}'] = ta.trend.EMAIndicator(close=df['Adj Close'], window=p).ema_indicator()

        # --- RSI ---
        if 'RSI' in indicators:
            rsi_period = indicators.get('RSI', 14)
            df[f'RSI_{rsi_period}'] = ta.momentum.RSIIndicator(close=df['Adj Close'], window=rsi_period).rsi()

        # --- MACD ---
        if 'MACD' in indicators:
            macd_params = indicators.get('MACD', [12, 26, 9])
            macd = ta.trend.MACD(close=df['Adj Close'],
                                 window_fast=macd_params[0],
                                 window_slow=macd_params[1],
                                 window_sign=macd_params[2])
            df['MACD'] = macd.macd()
            df['MACD_Signal'] = macd.macd_signal()

        # --- Volume Ratio ---
        if 'Volume' in indicators:
            vol_params = indicators.get('Volume', [7, 21])
            short_avg = df['Volume'].rolling(window=vol_params[0]).mean()
            long_avg = df['Volume'].rolling(window=vol_params[1]).mean()
            df[f'Volume_Ratio_{vol_params[0]}_{vol_params[1]}'] = (short_avg / long_avg).replace([np.inf, -np.inf], np.nan)

        df = df.dropna()

        df.reset_index(drop=True, inplace=True)
        combined_df.append(df)

        del df
        gc.collect()

    return pd.concat(combined_df, ignore_index=True)
