In [1]:
import os

In [2]:
%pwd

'c:\\Users\\SAROURA\\OneDrive\\Documents\\ING4\\Semestre 2\\Machine Learning Avancé\\Projet MLOPS\\End-to-end-MLOps\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\SAROURA\\OneDrive\\Documents\\ING4\\Semestre 2\\Machine Learning Avancé\\Projet MLOPS\\End-to-end-MLOps'

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    target_column: str

In [6]:
from src.MLOpsProject.constants import *
from src.MLOpsProject.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        schema =  self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path = config.train_data_path,
            test_data_path = config.test_data_path,
            target_column = schema.name
            
        )

        return model_trainer_config

In [8]:
import pandas as pd
import os
from src.MLOpsProject import logger

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score
import pandas as pd
import joblib

In [9]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config

    
    def train(self):
        train_data = pd.read_csv(self.config.train_data_path)
        test_data = pd.read_csv(self.config.test_data_path)


        train_x = train_data.drop([self.config.target_column], axis=1)
        test_x = test_data.drop([self.config.target_column], axis=1)
        train_y = train_data[[self.config.target_column]]
        test_y = test_data[[self.config.target_column]]


        # Liste des modèles à tester
        models = {
            "Logistic Regression": LogisticRegression(C=1.0, max_iter=1000, solver='lbfgs'),
            "K-Nearest Neighbors": KNeighborsClassifier(n_neighbors=5, weights='uniform', metric='minkowski'),
            "Decision Tree": DecisionTreeClassifier(max_depth=5, criterion='gini'),
            "Random Forest": RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42),
            "Support Vector Machine": SVC(C=1.0, kernel='rbf', probability=True),
            "Naive Bayes": GaussianNB(var_smoothing=1e-9),
            "Gradient Boosting": GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3),
            "AdaBoost": AdaBoostClassifier(n_estimators=50, learning_rate=1.0),
            "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss', learning_rate=0.1, max_depth=3, n_estimators=100)
        }

        # Entraînement et évaluation
        for name, model in models.items():
            model.fit(train_x, train_y)
            preds = model.predict(test_x)
            acc = accuracy_score(test_y, preds)
            print(f"\nModel: {name}")
            print(f"Accuracy: {acc:.4f}")
            print(classification_report(test_y, preds))
            
            # Sauvegarde du modèle
            joblib.dump(model, os.path.join(self.config.root_dir, f"{name.replace(' ', '_').lower()}_model.pkl"))



In [10]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config)
    model_trainer_config.train()
except Exception as e:
    raise e

[2025-05-03 12:21:34,889: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-05-03 12:21:34,895: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-05-03 12:21:34,896: INFO: common: created directory at: artifacts]
[2025-05-03 12:21:34,897: INFO: common: created directory at: artifacts/model_trainer]

Model: Logistic Regression
Accuracy: 0.9737
              precision    recall  f1-score   support

           0       0.97      0.99      0.98        71
           1       0.98      0.95      0.96        43

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114


Model: K-Nearest Neighbors
Accuracy: 0.9474
              precision    recall  f1-score   support

           0       0.96      0.96      0.96        71
           1       0.93      0.93      0.93        43

    accuracy                           0.95       114
   macro avg       0.94      0

  y = column_or_1d(y, warn=True)
  return self._fit(X, y)
  return fit_method(estimator, *args, **kwargs)



Model: Random Forest
Accuracy: 0.9649
              precision    recall  f1-score   support

           0       0.96      0.99      0.97        71
           1       0.98      0.93      0.95        43

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114


Model: Support Vector Machine
Accuracy: 0.9825
              precision    recall  f1-score   support

           0       0.97      1.00      0.99        71
           1       1.00      0.95      0.98        43

    accuracy                           0.98       114
   macro avg       0.99      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114


Model: Naive Bayes
Accuracy: 0.9649
              precision    recall  f1-score   support

           0       0.96      0.99      0.97        71
           1       0.98      0.93      0.95        43

    accuracy                           0.96       114
   m

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)



Model: Gradient Boosting
Accuracy: 0.9561
              precision    recall  f1-score   support

           0       0.96      0.97      0.97        71
           1       0.95      0.93      0.94        43

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114


Model: AdaBoost
Accuracy: 0.9649
              precision    recall  f1-score   support

           0       0.96      0.99      0.97        71
           1       0.98      0.93      0.95        43

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114



  y = column_or_1d(y, warn=True)



Model: XGBoost
Accuracy: 0.9561
              precision    recall  f1-score   support

           0       0.96      0.97      0.97        71
           1       0.95      0.93      0.94        43

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114



Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
