In [None]:
# STEP 1: Install required package
!pip install xgboost ta

# STEP 2: Imports
import pandas as pd
import numpy as np
import xgboost as xgb
import matplotlib.pyplot as plt
from ta.trend import MACD
from ta.momentum import RSIIndicator
from ta.volatility import BollingerBands
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from google.colab import files

plt.style.use('ggplot')


Collecting ta
  Downloading ta-0.11.0.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: ta
  Building wheel for ta (setup.py) ... [?25l[?25hdone
  Created wheel for ta: filename=ta-0.11.0-py3-none-any.whl size=29412 sha256=9da23fe78fdeed69db73aeea9583389b2eb80e49dbbc38189541b1ac7288ec2f
  Stored in directory: /root/.cache/pip/wheels/a1/d7/29/7781cc5eb9a3659d032d7d15bdd0f49d07d2b24fec29f44bc4
Successfully built ta
Installing collected packages: ta
Successfully installed ta-0.11.0


In [None]:
from google.colab import files
uploaded = files.upload()

Saving BAJFINANCE.csv to BAJFINANCE.csv
Saving BHARTIARTL.csv to BHARTIARTL.csv
Saving HDFCBANK.csv to HDFCBANK.csv
Saving INFY.csv to INFY.csv
Saving ITC.csv to ITC.csv
Saving LT.csv to LT.csv
Saving RELIANCE.csv to RELIANCE.csv
Saving SUNPHARMA.csv to SUNPHARMA.csv
Saving TATACHEM.csv to TATACHEM.csv
Saving TATAMOTORS.csv to TATAMOTORS.csv


In [None]:
stock_files = {
    'HDFC Bank': 'HDFCBANK.csv',
    'Reliance': 'RELIANCE.csv',
    'Infosys': 'INFY.csv',
    'ITC': 'ITC.csv',
    'Tata Motors': 'TATAMOTORS.csv',
    'Sun Pharma': 'SUNPHARMA.csv',
    'L&T': 'LT.csv',
    'Bharti Airtel': 'BHARTIARTL.csv',
    'Bajaj Finance': 'BAJFINANCE.csv',
    'Tata Chemicals': 'TATACHEM.csv'
}


In [None]:
def preprocess_and_train(file_path, stock_name):
    try:
        df = pd.read_csv(file_path)
        df.columns = [col.lower() for col in df.columns]

        df['date'] = pd.to_datetime(df['date'], errors='coerce')
        df.set_index('date', inplace=True)
        df.sort_index(inplace=True)

        df['close'] = pd.to_numeric(df['close'], errors='coerce')
        df.dropna(inplace=True)

        # Indicators
        df['rsi'] = RSIIndicator(close=df['close'], window=14).rsi()
        macd = MACD(close=df['close'])
        df['macd'] = macd.macd()
        df['macd_signal'] = macd.macd_signal()
        bb = BollingerBands(close=df['close'], window=20, window_dev=2)
        df['bb_upper'] = bb.bollinger_hband()
        df['bb_lower'] = bb.bollinger_lband()
        df['bb_mavg'] = bb.bollinger_mavg()

        # Lag features
        for lag in range(1, 6):
            df[f'close_lag_{lag}'] = df['close'].shift(lag)
        df['rolling_mean_5'] = df['close'].rolling(window=5).mean()
        df['rolling_std_5'] = df['close'].rolling(window=5).std()

        # Target
        df['target'] = (df['close'].shift(-1) > df['close']).astype(int)
        df.dropna(inplace=True)

        # Features & target
        X = df.drop(['target', 'close'], axis=1)
        y = df['target']

        # Train/test split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

        # XGBoost model
        model = xgb.XGBClassifier(
            n_estimators=100, max_depth=4, learning_rate=0.1,
            subsample=0.8, colsample_bytree=0.8,
            random_state=42, use_label_encoder=False, eval_metric='logloss'
        )
        model.fit(X_train, y_train)

        # Predictions
        y_pred = model.predict(X_test)
        y_proba = model.predict_proba(X_test)[:, 1]

        # Performance metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        roc_auc = roc_auc_score(y_test, y_proba)

        # Strategy performance
        X_test_copy = X_test.copy()
        X_test_copy['pred'] = y_pred
        X_test_copy['actual'] = y_test.values
        X_test_copy['close'] = df.loc[X_test.index, 'close']
        X_test_copy['daily_return'] = X_test_copy['close'].pct_change()
        X_test_copy['position'] = X_test_copy['pred'].replace({0: -1, 1: 1})
        X_test_copy['strategy_return'] = X_test_copy['position'].shift(1) * X_test_copy['daily_return']

        ret = X_test_copy['strategy_return'].dropna()
        sharpe = (ret.mean() / ret.std()) * np.sqrt(252) if ret.std() != 0 else 0
        cumulative = (1 + ret).cumprod()
        drawdown = (cumulative.cummax() - cumulative) / cumulative.cummax()
        max_drawdown = drawdown.max()
        signal_accuracy = (X_test_copy['actual'] == X_test_copy['pred']).mean()

        return {
            'Stock': stock_name,
            'Accuracy': round(accuracy, 2),
            'Precision': round(precision, 2),
            'Recall': round(recall, 2),
            'F1 Score': round(f1, 2),
            'ROC AUC': round(roc_auc, 2),
            'Sharpe Ratio': round(sharpe, 2),
            'Max Drawdown (%)': round(max_drawdown * 100, 2),
            'Signal Accuracy (%)': round(signal_accuracy * 100, 2)
        }

    except Exception as e:
        print(f"{stock_name} failed: {e}")
        return None


In [None]:
results = []

for name, path in stock_files.items():
    print(f"📈 Running XGBoost for {name}...")
    result = preprocess_and_train(path, name)
    if result:
        results.append(result)

summary_df = pd.DataFrame(results)
summary_df


📈 Running XGBoost for HDFC Bank...


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


📈 Running XGBoost for Reliance...
📈 Running XGBoost for Infosys...


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


📈 Running XGBoost for ITC...
📈 Running XGBoost for Tata Motors...
📈 Running XGBoost for Sun Pharma...


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


📈 Running XGBoost for L&T...
📈 Running XGBoost for Bharti Airtel...


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


📈 Running XGBoost for Bajaj Finance...


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


📈 Running XGBoost for Tata Chemicals...


Unnamed: 0,Stock,Accuracy,Precision,Recall,F1 Score,ROC AUC,Sharpe Ratio,Max Drawdown (%),Signal Accuracy (%)
0,HDFC Bank,0.49,0.57,0.23,0.33,0.47,0.04,24.99,49.44
1,Reliance,0.48,0.47,0.27,0.34,0.49,0.18,29.06,47.95
2,Infosys,0.48,0.48,0.56,0.52,0.47,-0.03,22.25,48.13
3,ITC,0.54,0.55,0.41,0.47,0.54,1.79,22.58,53.73
4,Tata Motors,0.48,0.43,0.15,0.22,0.47,-1.87,71.75,47.76
5,Sun Pharma,0.5,0.53,0.52,0.53,0.51,-0.25,27.96,49.81
6,L&T,0.48,0.68,0.06,0.11,0.53,-0.83,43.7,47.76
7,Bharti Airtel,0.47,0.48,0.28,0.35,0.48,-1.09,51.72,46.83
8,Bajaj Finance,0.49,0.49,0.66,0.56,0.5,-0.12,38.63,49.25
9,Tata Chemicals,0.52,0.54,0.52,0.53,0.53,0.04,46.51,51.68


In [None]:
summary_df.to_csv("XGBoost_Stock_Performance.csv", index=False)
files.download("XGBoost_Stock_Performance.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>