In [2]:
import io
import logging
import requests
import pandas as pd
from config import HEADERS, CSV_DOWNLOAD_URL, DHAN_SCRIP_MASTER_CSV

In [3]:
# Initialize session
session = requests.Session()
session.headers.update(HEADERS)

def download_dhan_csv():
    """Download the Dhan scrip master CSV."""
    try:
        r = requests.get(CSV_DOWNLOAD_URL, timeout=30)
        r.raise_for_status()
        with open(DHAN_SCRIP_MASTER_CSV, "wb") as f:
            f.write(r.content)
        logging.info("✅ Dhan CSV downloaded successfully.")
        return True
    except Exception as e:
        logging.error(f"❌ Failed downloading Dhan CSV: {e}")
        return False

def fetch_nifty50_symbols():
    """Fetch the latest list of Nifty50 stock symbols."""
    try:
        r = session.get("https://nsearchives.nseindia.com/content/indices/ind_nifty50list.csv", timeout=30)
        r.raise_for_status()
        df = pd.read_csv(io.StringIO(r.text))
        return (df["Symbol"] + ".NS").tolist()
    except Exception as e:
        logging.error(f"Error fetching Nifty50 symbols: {e}")
        return []

# 🟢 Display output
print("Downloading Dhan CSV...")
if download_dhan_csv():
    print("✅ Dhan CSV download completed.\n")

print("Fetching NIFTY 50 symbols...")
symbols = fetch_nifty50_symbols()
print(f"✅ NIFTY 50 Symbols ({len(symbols)}):\n{symbols}")

Downloading Dhan CSV...
✅ Dhan CSV download completed.

Fetching NIFTY 50 symbols...
✅ NIFTY 50 Symbols (50):
['ADANIENT.NS', 'ADANIPORTS.NS', 'APOLLOHOSP.NS', 'ASIANPAINT.NS', 'AXISBANK.NS', 'BAJAJ-AUTO.NS', 'BAJFINANCE.NS', 'BAJAJFINSV.NS', 'BEL.NS', 'BHARTIARTL.NS', 'CIPLA.NS', 'COALINDIA.NS', 'DRREDDY.NS', 'EICHERMOT.NS', 'ETERNAL.NS', 'GRASIM.NS', 'HCLTECH.NS', 'HDFCBANK.NS', 'HDFCLIFE.NS', 'HEROMOTOCO.NS', 'HINDALCO.NS', 'HINDUNILVR.NS', 'ICICIBANK.NS', 'ITC.NS', 'INDUSINDBK.NS', 'INFY.NS', 'JSWSTEEL.NS', 'JIOFIN.NS', 'KOTAKBANK.NS', 'LT.NS', 'M&M.NS', 'MARUTI.NS', 'NTPC.NS', 'NESTLEIND.NS', 'ONGC.NS', 'POWERGRID.NS', 'RELIANCE.NS', 'SBILIFE.NS', 'SHRIRAMFIN.NS', 'SBIN.NS', 'SUNPHARMA.NS', 'TCS.NS', 'TATACONSUM.NS', 'TATAMOTORS.NS', 'TATASTEEL.NS', 'TECHM.NS', 'TITAN.NS', 'TRENT.NS', 'ULTRACEMCO.NS', 'WIPRO.NS']


In [4]:
df = pd.read_csv("data/api-scrip-master.csv")

options_df = df[df['SEM_INSTRUMENT_NAME'].str.contains("OPT")]
options_df = options_df[options_df['SEM_OPTION_TYPE'].isin(['CE', 'PE']) & options_df['SEM_EXPIRY_DATE'].notna()]
latest_options = options_df.sort_values('SEM_EXPIRY_DATE', ascending=False).groupby('SM_SYMBOL_NAME').head(10)

print(latest_options[['SM_SYMBOL_NAME', 'SEM_TRADING_SYMBOL', 'SEM_EXPIRY_DATE', 'SEM_OPTION_TYPE']].head(5))

  df = pd.read_csv("data/api-scrip-master.csv")


      SM_SYMBOL_NAME       SEM_TRADING_SYMBOL      SEM_EXPIRY_DATE  \
36697         BSXOPT  SENSEX-Jun2030-84000-PE  2030-06-27 15:30:00   
36585         BSXOPT  SENSEX-Jun2030-81000-PE  2030-06-27 15:30:00   
36342         BSXOPT  SENSEX-Jun2030-88000-PE  2030-06-27 15:30:00   
36344         BSXOPT  SENSEX-Jun2030-82000-PE  2030-06-27 15:30:00   
36349         BSXOPT  SENSEX-Jun2030-85000-CE  2030-06-27 15:30:00   

      SEM_OPTION_TYPE  
36697              PE  
36585              PE  
36342              PE  
36344              PE  
36349              CE  


In [6]:
import yfinance as yf

# Example: Fetch 3 months of 1-day interval price data
df = yf.Ticker("RELIANCE.NS").history(period="3mo", interval="1h")
df = df[df['Volume'] > 0]
df = df[['Open', 'High', 'Low', 'Close', 'Volume']]  # keep required columns
df.index = df.index.strftime('%Y-%m-%d %H:%M')

print(df)

                         Open         High          Low        Close   Volume
Datetime                                                                     
2025-04-17 10:15  1229.800049  1242.400024  1229.199951  1242.099976  1519469
2025-04-17 11:15  1242.300049  1260.000000  1242.199951  1259.000000  2433094
2025-04-17 12:15  1259.000000  1267.500000  1258.300049  1267.300049  1913242
2025-04-17 13:15  1267.400024  1277.599976  1265.300049  1275.000000  3139558
2025-04-17 14:15  1275.000000  1278.300049  1272.000000  1273.500000  3532377
...                       ...          ...          ...          ...      ...
2025-07-16 11:15  1480.000000  1483.699951  1476.500000  1483.000000  1068791
2025-07-16 12:15  1483.300049  1485.400024  1479.900024  1480.699951  1582096
2025-07-16 13:15  1480.699951  1485.000000  1479.400024  1485.000000   916879
2025-07-16 14:15  1485.000000  1491.000000  1484.000000  1485.099976  2443017
2025-07-16 15:15  1485.400024  1486.800049  1484.400024  1486.19

In [None]:
import talib

def add_cdl_patterns(df):
    patterns = [f for f in dir(talib) if f.startswith('CDL')]
    for pattern in patterns:
        try:
            df[pattern] = getattr(talib, pattern)(df['Open'], df['High'], df['Low'], df['Close'])
        except:
            df[pattern] = 0
    return df

df = add_cdl_patterns(df)
# print(df[[col for col in df.columns if col.startswith("CDL")]].head())

                  CDL2CROWS  CDL3BLACKCROWS  CDL3INSIDE  CDL3LINESTRIKE  \
Datetime                                                                  
2025-04-17 10:15          0               0           0               0   
2025-04-17 11:15          0               0           0               0   
2025-04-17 12:15          0               0           0               0   
2025-04-17 13:15          0               0           0               0   
2025-04-17 14:15          0               0           0               0   

                  CDL3OUTSIDE  CDL3STARSINSOUTH  CDL3WHITESOLDIERS  \
Datetime                                                             
2025-04-17 10:15            0                 0                  0   
2025-04-17 11:15            0                 0                  0   
2025-04-17 12:15            0                 0                  0   
2025-04-17 13:15            0                 0                  0   
2025-04-17 14:15            0                 0       

In [29]:
def detect_smc_features(df):
    df = df.copy()
    df.loc[:, 'BOS'] = df['High'].gt(df['High'].shift(1)) & df['Low'].lt(df['Low'].shift(1))
    df.loc[:, 'OrderBlock'] = (df['Close'] < df['Open']) & (df['Volume'] > df['Volume'].rolling(5).mean())
    df.loc[:, 'LiquiditySweep'] = df['Low'] < df['Low'].rolling(10).min()
    df.loc[:, 'CHoCH'] = df['Close'] > df['High'].shift(1)
    return df

df.index = pd.to_datetime(df.index).date

df = detect_smc_features(df)
print(df[['BOS', 'OrderBlock', 'LiquiditySweep', 'CHoCH']].tail())

              BOS  OrderBlock  LiquiditySweep  CHoCH
2025-07-16  False       False           False   True
2025-07-16  False        True           False  False
2025-07-16  False       False           False  False
2025-07-16  False       False           False   True
2025-07-16  False       False           False  False


In [30]:
def engineer_features(df):
    df = df.copy()
    df['ATR'] = talib.ATR(df['High'], df['Low'], df['Close'], timeperiod=14)
    df['RSI'] = talib.RSI(df['Close'], timeperiod=14)
    df['MACD'], _, _ = talib.MACD(df['Close'])
    df['BB_upper'], df['BB_middle'], df['BB_lower'] = talib.BBANDS(df['Close'])
    df['Volume_MA'] = df['Volume'].rolling(20).mean()
    return df

df = engineer_features(df)
print(df[['ATR', 'RSI', 'MACD', 'BB_upper', 'BB_lower', 'Volume_MA']].tail())

                 ATR        RSI      MACD     BB_upper     BB_lower  \
2025-07-16  7.112575  37.694596 -8.145176  1492.213670  1473.466310   
2025-07-16  6.997391  35.971703 -7.976028  1489.953915  1473.166055   
2025-07-16  6.897576  41.367306 -7.409586  1487.139633  1474.580337   
2025-07-16  6.904892  41.490758 -6.873378  1487.026895  1478.453085   
2025-07-16  6.583116  42.914904 -6.287195  1487.892538  1480.107413   

             Volume_MA  
2025-07-16  1269433.10  
2025-07-16  1300615.90  
2025-07-16  1316595.85  
2025-07-16  1409042.75  
2025-07-16  1364160.50  


In [31]:
import numpy as np

def label_targets(df, horizon=5, threshold=0.02):
    df['future_return'] = df['Close'].shift(-horizon) / df['Close'] - 1
    df['target'] = np.where(df['future_return'] > threshold, 1, 0)
    df['target'] = np.where(df['future_return'] < -threshold, -1, df['target'])  # -1 = sell, 1 = buy
    return df

df = label_targets(df, horizon=5, threshold=0.02).dropna()
print(df[['Close', 'future_return', 'target']].tail(10))

                  Close  future_return  target
2025-07-14  1483.400024       0.002697       0
2025-07-14  1485.599976       0.001010       0
2025-07-15  1493.400024      -0.003281       0
2025-07-15  1492.699951      -0.011389       0
2025-07-15  1489.199951      -0.006245       0
2025-07-15  1487.400024      -0.002958       0
2025-07-15  1487.099976      -0.004304       0
2025-07-15  1488.500000      -0.002351       0
2025-07-16  1475.699951       0.006370       0
2025-07-16  1479.900024       0.004257       0


In [32]:
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split

def train_model(df):
    df = df.copy()
    df = engineer_features(add_cdl_patterns(detect_smc_features(df)))
    df = label_targets(df)
    df = df.dropna()

    features = [col for col in df.columns if col not in ['target', 'future_return']]
    X, y = df[features], df['target']
    y = y.replace({-1: 0, 0: 1, 1: 2})  # remap: 0=Sell, 1=Hold, 2=Buy

    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2)
    
    model = XGBClassifier(n_estimators=100, max_depth=6, learning_rate=0.05)
    model.fit(X_train, y_train)

    accuracy = model.score(X_test, y_test)
    print(f"✅ Model trained with accuracy: {accuracy:.2%}")

    return model, features  # ⬅ return features too

model = train_model(df)

✅ Model trained with accuracy: 93.75%


In [33]:
def generate_signal(model, latest_df, trained_features):
    latest_df = engineer_features(add_cdl_patterns(detect_smc_features(latest_df)))
    features = latest_df.iloc[-1:][trained_features]
    pred = model.predict(features)[0]
    return {0: 'Sell', 1: 'Hold', 2: 'Buy'}.get(pred, 'Hold')

model, trained_features = train_model(df)
signal = generate_signal(model, df, trained_features)
print(f"🔔 Signal: {signal}")


✅ Model trained with accuracy: 92.19%
🔔 Signal: Hold
