In [1]:
import os

In [2]:
%pwd

'd:\\end to end mental_health_prediction\\research'

In [3]:
os.chdir("../")

In [4]:
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Any

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    scaler_path: Path
    model_dir: Path
    models: Dict[str, Dict[str, Any]]


In [5]:
from mlProject.constants import *
from mlProject.utils.common import read_yaml, create_directories

class ConfigurationManager:
    def __init__(self,
                 config_filepath=CONFIG_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH,
                 schema_filepath=SCHEMA_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        model_configs = self.params.model_trainer.models

        create_directories([config.root_dir])

        return ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            test_data_path=config.test_data_path,
            scaler_path=config.scaler_path,
            model_dir=config.model_dir,
            models=model_configs
        )


In [6]:
import os
import joblib
import pandas as pd
import importlib
from sklearn.metrics import accuracy_score, classification_report, f1_score
from dataclasses import dataclass
from typing import Dict, Any


class ModelTrainer:
    def __init__(self, config):
        self.config = config

    def load_data(self):
        train_df = pd.read_csv(self.config.train_data_path)
        test_df = pd.read_csv(self.config.test_data_path)

        X_train = train_df.drop("treatment", axis=1)
        y_train = train_df["treatment"]

        X_test = test_df.drop("treatment", axis=1)
        y_test = test_df["treatment"]

        return X_train, X_test, y_train, y_test

    def load_scaler(self):
        return joblib.load(self.config.scaler_path)

    def train_and_evaluate(self):
        X_train, X_test, y_train, y_test = self.load_data()
        scaler = self.load_scaler()

        X_train_scaled = scaler.transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        best_model = None
        best_f1 = 0
        best_model_name = ""

        print(f"\n📊 Evaluating models...\n")

        for model_name, model_config in self.config.models.items():
            print(f"🔹 Training: {model_name.replace('_', ' ').title()}")

            module = importlib.import_module(model_config['module'])
            model_class = getattr(module, model_config['class'])
            model = model_class(**model_config.get('params', {}))

            model.fit(X_train_scaled, y_train)
            y_pred = model.predict(X_test_scaled)

            acc = accuracy_score(y_test, y_pred)
            f1 = f1_score(y_test, y_pred, pos_label=1)

            print(f"✅ Accuracy: {acc:.4f} | F1 Score: {f1:.4f}")
            print("📄 Classification Report:")
            print(classification_report(y_test, y_pred, target_names=["No", "Yes"]))

            if f1 > best_f1:
                best_f1 = f1
                best_model = model
                best_model_name = model_name

        # Save the best model using joblib
        print(f"\n💾 Saving best model: {best_model_name.title()} (F1: {best_f1:.4f})")
        os.makedirs(os.path.dirname(self.config.model_dir), exist_ok=True)
        joblib.dump(best_model, self.config.model_dir)


In [7]:
try:
    config = ConfigurationManager()
    trainer_config = config.get_model_trainer_config()  
    model_trainer = ModelTrainer(config=trainer_config)
    model_trainer.train_and_evaluate()
except Exception as e:
    raise e

[2025-07-21 17:38:09,400: INFO: common: yaml file: D:\end to end mental_health_prediction\config.yaml loaded successfully]
[2025-07-21 17:38:09,408: INFO: common: yaml file: D:\end to end mental_health_prediction\params.yaml loaded successfully]
[2025-07-21 17:38:09,415: INFO: common: yaml file: D:\end to end mental_health_prediction\schema.yaml loaded successfully]
[2025-07-21 17:38:09,415: INFO: common: created directory at: artifacts]
[2025-07-21 17:38:09,415: INFO: common: created directory at: artifacts/model_trainer]

📊 Evaluating models...

🔹 Training: Logistic Regression
✅ Accuracy: 0.6853 | F1 Score: 0.6926
📄 Classification Report:
              precision    recall  f1-score   support

          No       0.63      0.73      0.68       114
         Yes       0.74      0.65      0.69       137

    accuracy                           0.69       251
   macro avg       0.69      0.69      0.69       251
weighted avg       0.69      0.69      0.69       251

🔹 Training: Knn




✅ Accuracy: 0.6335 | F1 Score: 0.6260
📄 Classification Report:
              precision    recall  f1-score   support

          No       0.58      0.72      0.64       114
         Yes       0.71      0.56      0.63       137

    accuracy                           0.63       251
   macro avg       0.64      0.64      0.63       251
weighted avg       0.65      0.63      0.63       251

🔹 Training: Decision Tree
✅ Accuracy: 0.6892 | F1 Score: 0.7023
📄 Classification Report:
              precision    recall  f1-score   support

          No       0.64      0.71      0.68       114
         Yes       0.74      0.67      0.70       137

    accuracy                           0.69       251
   macro avg       0.69      0.69      0.69       251
weighted avg       0.69      0.69      0.69       251

🔹 Training: Random Forest
✅ Accuracy: 0.7450 | F1 Score: 0.7480
📄 Classification Report:
              precision    recall  f1-score   support

          No       0.69      0.81      0.74       

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


✅ Accuracy: 0.6693 | F1 Score: 0.6820
📄 Classification Report:
              precision    recall  f1-score   support

          No       0.62      0.69      0.66       114
         Yes       0.72      0.65      0.68       137

    accuracy                           0.67       251
   macro avg       0.67      0.67      0.67       251
weighted avg       0.67      0.67      0.67       251

🔹 Training: Catboost
✅ Accuracy: 0.7171 | F1 Score: 0.7259
📄 Classification Report:
              precision    recall  f1-score   support

          No       0.67      0.75      0.71       114
         Yes       0.77      0.69      0.73       137

    accuracy                           0.72       251
   macro avg       0.72      0.72      0.72       251
weighted avg       0.72      0.72      0.72       251

🔹 Training: Lightgbm
[LightGBM] [Info] Number of positive: 495, number of negative: 506
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002268 seconds.
You can

