In [1]:
pip install scikit-learn joblib pandas


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.3 -> 26.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

In [3]:
PROCESSED_DATA_DIR = r"D:\wear\stock-platform\data\processed"
MODEL_DIR = r"D:\wear\stock-platform\models"

In [4]:
os.makedirs(MODEL_DIR, exist_ok=True)

print("Starting model training...\n")

for file in os.listdir(PROCESSED_DATA_DIR):
    if not file.endswith("_processed.csv"):
        continue

    print(f"Training model for {file}...")

    # LOAD DATA
    file_path = os.path.join(PROCESSED_DATA_DIR, file)
    df = pd.read_csv(file_path)

    # FEATURES & TARGET
    features = [
        "Daily_Return",
        "MA_5",
        "MA_10",
        "Volatility_5",
        "Price_Change",
        "Volume"
    ]

    X = df[features]
    y = df["Target"]

    # TRAIN TEST SPLIT
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, shuffle=False
    )

    # MODEL
    model = RandomForestClassifier(
        n_estimators=100,
        max_depth=5,
        random_state=42
    )

    model.fit(X_train, y_train)

    # EVALUATION
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    print(f"Accuracy for {file}: {accuracy:.2f}")
    print(classification_report(y_test, y_pred))

    # SAVE MODEL
    model_name = file.replace("_processed.csv", "_rf_model.pkl")
    model_path = os.path.join(MODEL_DIR, model_name)
    joblib.dump(model, model_path)

    print(f"Model saved at: {model_path}\n")

print("Model training completed for all stocks.")

Starting model training...

Training model for AAPL_processed.csv...
Accuracy for AAPL_processed.csv: 0.46
              precision    recall  f1-score   support

           0       0.43      0.42      0.42       118
           1       0.49      0.51      0.50       132

    accuracy                           0.46       250
   macro avg       0.46      0.46      0.46       250
weighted avg       0.46      0.46      0.46       250

Model saved at: D:\wear\stock-platform\models\AAPL_rf_model.pkl

Training model for AMZN_processed.csv...
Accuracy for AMZN_processed.csv: 0.53
              precision    recall  f1-score   support

           0       0.50      0.58      0.54       119
           1       0.56      0.48      0.52       131

    accuracy                           0.53       250
   macro avg       0.53      0.53      0.53       250
weighted avg       0.53      0.53      0.53       250

Model saved at: D:\wear\stock-platform\models\AMZN_rf_model.pkl

Training model for GOOGL_proce