In [18]:
# Import necessary libraries
import pandas as pd
import numpy as np
from ta.trend import ADXIndicator
from ta.volatility import AverageTrueRange
from ta.momentum import RSIIndicator
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score


In [19]:
# Step 1: Load Dataset
file_path = 'NIFTY 100.csv'
df = pd.read_csv(file_path, parse_dates=['Date'])


In [20]:
# Step 2: Handle Missing Values
df = df.ffill()


In [21]:
# Step 3: Feature Engineering
def compute_indicators(data):
    data['MA_20'] = data['Close'].rolling(window=20).mean()
    data['RSI'] = RSIIndicator(data['Close'], window=14).rsi()
    data['ADX'] = ADXIndicator(data['High'], data['Low'], data['Close'], window=14).adx()
    data['ATR'] = AverageTrueRange(data['High'], data['Low'], data['Close'], window=14).average_true_range()
    data.dropna(inplace=True)
    return data
df = compute_indicators(df)


In [22]:
# Step 4: Assign Target Variable (Ranking Stocks)
def assign_labels(data):
    data = data.sort_values(by=['Date', 'Close'], ascending=[True, False]).reset_index(drop=True)
    data['Rank'] = data.groupby('Date')['Close'].rank(method='first', ascending=False)
    data['Target'] = data['Rank'].apply(lambda x: 1 if x == 1 else 0)
    return data.drop(columns=['Rank'])

df = assign_labels(df)


In [23]:
# Step 5: Train the Model
def train_model(data):
    features = ['MA_20', 'RSI', 'ADX', 'ATR']
    X = data[features]
    y = data['Target']
    
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
    
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    
    print(f'Accuracy: {accuracy_score(y_test, predictions)}')
    return model, scaler
model, scaler = train_model(df)


Accuracy: 1.0


In [24]:
# Step 6: Backtesting
def backtest_model(model, scaler, data):
    features = ['MA_20', 'RSI', 'ADX', 'ATR']
    X = scaler.transform(data[features])
    data['Predicted_Label'] = model.predict(X)
    return data[data['Predicted_Label'] == 1]  # Top-performing stocks
top_stocks = backtest_model(model, scaler, df)
print(top_stocks)


           Date     Open     High      Low    Close        Volume  \
0    2005-09-19     0.00     0.00     0.00  2545.24  1.535458e+08   
1    2005-09-20     0.00     0.00     0.00  2552.90  1.609333e+08   
2    2005-09-21     0.00     0.00     0.00  2537.10  1.911060e+08   
3    2005-09-22     0.00     0.00     0.00  2441.97  1.864161e+08   
4    2005-09-23     0.00     0.00     0.00  2444.67  1.391900e+08   
...         ...      ...      ...      ...      ...           ...   
3619 2020-05-04  9682.65  9683.25  9430.35  9454.05  1.643802e+09   
3620 2020-05-05  9591.70  9615.80  9344.55  9358.90  1.220003e+09   
3621 2020-05-06  9379.35  9489.25  9266.10  9428.55  1.455393e+09   
3622 2020-05-07  9394.25  9437.80  9335.50  9351.45  1.076714e+09   
3623 2020-05-08  9522.65  9528.35  9395.45  9408.65  1.011498e+09   

          Turnover    P/E   P/B  Div Yield      MA_20        RSI        ADX  \
0     2.947050e+10  16.43  3.77       1.71  2404.8055  80.218399   0.000000   
1     4.02109