In [1]:
import pandas as pd
import joblib

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


In [2]:
stocks = [
    "reliance",
    "tcs",
    "hdfcbank",
    "infy",
    "icicibank"
]


In [4]:
results = {}

for stock in stocks:
    print(f"\n================ {stock.upper()} =================")

    # Load dataset
    dataset = pd.read_csv(
        f"data/{stock}_ml_dataset.csv",
        index_col=0,
        parse_dates=True
    )

    X = dataset.drop(columns=["target"])
    y = dataset["target"]

    # Time-series split
    split_idx = int(len(dataset) * 0.8)
    X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
    y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]

    # Model
    model = RandomForestClassifier(
        n_estimators=600,
        max_depth=14,
        min_samples_leaf=10,
        class_weight="balanced",
        random_state=42,
        n_jobs=-1
    )

    model.fit(X_train, y_train)

    # Predictions
    y_prob = model.predict_proba(X_test)[:, 1]
    y_pred = (y_prob > 0.5).astype(int)

    acc = accuracy_score(y_test, y_pred)

    print("Accuracy:", round(acc, 4))
    print(classification_report(y_test, y_pred))

    # Save model
    joblib.dump(model, f"models/rf_{stock}.pkl")

    # Store results
    results[stock] = {
        "accuracy": acc,
        "features": X.columns.tolist()
    }



Accuracy: 0.5769
              precision    recall  f1-score   support

           0       0.56      0.79      0.65       170
           1       0.63      0.36      0.46       168

    accuracy                           0.58       338
   macro avg       0.59      0.58      0.56       338
weighted avg       0.59      0.58      0.56       338


Accuracy: 0.4556
              precision    recall  f1-score   support

           0       0.53      0.38      0.44       191
           1       0.41      0.56      0.47       147

    accuracy                           0.46       338
   macro avg       0.47      0.47      0.46       338
weighted avg       0.47      0.46      0.45       338


Accuracy: 0.5237
              precision    recall  f1-score   support

           0       0.51      0.48      0.50       165
           1       0.53      0.56      0.55       173

    accuracy                           0.52       338
   macro avg       0.52      0.52      0.52       338
weighted avg       0

In [5]:
import pandas as pd
import joblib

feature_importance_df = []

for stock in stocks:
    model = joblib.load(f"models/rf_{stock}.pkl")
    dataset = pd.read_csv(f"data/{stock}_ml_dataset.csv", index_col=0)

    X = dataset.drop(columns=["target"])

    fi = pd.Series(
        model.feature_importances_,
        index=X.columns,
        name=stock
    )

    feature_importance_df.append(fi)

feature_importance_df = pd.concat(feature_importance_df, axis=1)
feature_importance_df


Unnamed: 0,reliance,tcs,hdfcbank,infy,icicibank
return,0.126613,0.130629,0.124903,0.123282,0.1433
sma_20,0.117657,0.116444,0.121412,0.11765,0.120449
sma_50,0.1196,0.115351,0.109906,0.118923,0.113352
rsi,0.121521,0.130559,0.123448,0.129763,0.126002
volatility,0.11786,0.122085,0.139178,0.125239,0.114673
nifty_return,0.147509,0.13571,0.133817,0.129289,0.129126
nifty_volatility,0.129132,0.122796,0.122688,0.128682,0.12675
banknifty_return,0.120107,0.126425,0.124647,0.127172,0.126348


In [6]:
feature_importance_df["mean_importance"] = feature_importance_df.mean(axis=1)

feature_importance_df.sort_values(
    by="mean_importance",
    ascending=False
)


Unnamed: 0,reliance,tcs,hdfcbank,infy,icicibank,mean_importance
nifty_return,0.147509,0.13571,0.133817,0.129289,0.129126,0.135091
return,0.126613,0.130629,0.124903,0.123282,0.1433,0.129745
rsi,0.121521,0.130559,0.123448,0.129763,0.126002,0.126259
nifty_volatility,0.129132,0.122796,0.122688,0.128682,0.12675,0.12601
banknifty_return,0.120107,0.126425,0.124647,0.127172,0.126348,0.12494
volatility,0.11786,0.122085,0.139178,0.125239,0.114673,0.123807
sma_20,0.117657,0.116444,0.121412,0.11765,0.120449,0.118722
sma_50,0.1196,0.115351,0.109906,0.118923,0.113352,0.115426
