In [14]:
import pandas as pd
import numpy as np 
import yfinance as yf

In [15]:
TICKERS = ["RELIANCE.NS", "TCS.NS", "INFY.NS", "HCLTECH.NS", "HDFCBANK.NS", "SBIN.NS"]
START_DATE = "2020-01-01"

In [16]:
def calculate_rsi(series , period=14):
    delta = series.diff()
    gain = (delta.where(delta > 0 , 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0 , 0)).rolling(window=period).mean()
    rs = gain/loss
    return 100 - (100/(1+rs))

In [17]:
def calculate_macd(series, fast=12, slow=26, signal=9):
    exp1 = series.ewm(span=fast, adjust=False).mean()
    exp2 = series.ewm(span=slow, adjust=False).mean()
    macd = exp1 - exp2
    signal_line = macd.ewm(span=signal, adjust=False).mean()
    return macd, signal_line

In [18]:
def prep_stock_data(ticker):
    print(f"Processing {ticker}...")
    df = yf.download(ticker, start=START_DATE, progress=False, auto_adjust=True)
    
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = df.columns.droplevel(1)

    if df['Volume'].mean() < 100000:
        print(f"   -> ‚ùå REJECTED {ticker}: Volume too low.")
        return None

    df = df.copy()
    
    # --- 1. INDICATORS ---
    df['SMA_50'] = df['Close'].rolling(window=50).mean()
    df['SMA_200'] = df['Close'].rolling(window=200).mean() # Added for Regime
    df['RSI'] = calculate_rsi(df['Close'])
    macd, signal = calculate_macd(df['Close'])
    df['MACD'] = macd
    df['MACD_Signal'] = signal
    
    # --- 2. REGIME INDICATORS (Context) ---
    # distance_50: Is price way above the average? (>1 means yes)
    df['Dist_SMA_50'] = df['Close'] / df['SMA_50'] 
    df['Dist_SMA_200'] = df['Close'] / df['SMA_200'] 
    
    # --- 3. RAW PRICE STRUCTURE (Facts) ---
    # Return_1d: What happened yesterday?
    df['Ret_1d'] = df['Close'].pct_change(periods=1)
    # Return_5d: What happened last week?
    df['Ret_5d'] = df['Close'].pct_change(periods=5)
    # High-Low Range: How volatile was the day?
    df['Day_Range'] = (df['High'] - df['Low']) / df['Open']
    
    # --- 4. TARGET ---
    df['Future_Close'] = df['Close'].shift(-5)
    df['Target'] = (df['Future_Close'] > df['Close'] * 1.01).astype(int)
    
    df['Ticker'] = ticker
    df = df.dropna()
    
    return df

In [19]:
if __name__ == "__main__":
    print("üöÄ Starting Engine (With Threshold Targets)...\n")
    all_stocks = []
    
    for ticker in TICKERS:
        try:
            stock_df = prep_stock_data(ticker)
            if stock_df is not None:
                all_stocks.append(stock_df)
                print(f"   -> ‚úÖ ACCEPTED {ticker} with {len(stock_df)} rows")
        except Exception as e:
            print(f"   -> ‚ö†Ô∏è ERROR with {ticker}: {e}")
            
    if all_stocks:
        final_df = pd.concat(all_stocks)
        final_df.to_csv("stock_data_final.csv")
        print("\nüéâ SUCCESS. Dataset saved to 'stock_data_final.csv'.")
        print("Verification: Target column is now based on >0.5% returns.")
    else:
        print("‚ùå System Failure.")

üöÄ Starting Engine (With Threshold Targets)...

Processing RELIANCE.NS...
   -> ‚úÖ ACCEPTED RELIANCE.NS with 1296 rows
Processing TCS.NS...
   -> ‚úÖ ACCEPTED TCS.NS with 1296 rows
Processing INFY.NS...
   -> ‚úÖ ACCEPTED INFY.NS with 1296 rows
Processing HCLTECH.NS...
   -> ‚úÖ ACCEPTED HCLTECH.NS with 1296 rows
Processing HDFCBANK.NS...
   -> ‚úÖ ACCEPTED HDFCBANK.NS with 1296 rows
Processing SBIN.NS...
   -> ‚úÖ ACCEPTED SBIN.NS with 1296 rows

üéâ SUCCESS. Dataset saved to 'stock_data_final.csv'.
Verification: Target column is now based on >0.5% returns.
