In [1]:
import os

In [2]:
%pwd

'c:\\Users\\Jaison\\Documents\\Workspace\\Main Projects\\End_to_end_Employee\\End_to_End_Implementation\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\Jaison\\Documents\\Workspace\\Main Projects\\End_to_end_Employee\\End_to_End_Implementation'

In [5]:
import os

os.environ['MLFLOW_TRACKING_URI'] = 'https://dagshub.com/JAISON14/End_to_End_Implementation.mlflow'
os.environ['MLFLOW_TRACKING_USERNAME'] = 'Enter User name here'
os.environ['MLFLOW_TRACKING_PASSWORD'] = 'Enter Password Here'


In [6]:
from dataclasses import dataclass
from pathlib import Path
from typing import Dict

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    train_target_path: Path
    test_target_path: Path
    model_name: str
    model_type: str # New field to identify model type
    hyperparameters: Dict[str, float]
    # alpha: float
    # l1_ratio: float
    # target_column: str
    

In [7]:
from Employee_Attition_End_to_end_ML_project_with_MLflow.constants import *
from Employee_Attition_End_to_end_ML_project_with_MLflow.utils.common import read_yaml, create_directories


In [8]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    def get_model_trainer_config(self, model_type: str) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.ElasticNet
        schema =  self.schema.TARGET_COLUMN
        hyperparameters = self.params.get(model_type, {})

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path = config.train_data_path,
            test_data_path = config.test_data_path,
            train_target_path = config.train_target_path,
            test_target_path = config.test_target_path,
            model_name = config.model_name,
            model_type = model_type,
            hyperparameters=params.hyperparameters
            # alpha = params.alpha,
            # l1_ratio = params.l1_ratio,
            # target_column = schema.name
            
        )

        return model_trainer_config

In [9]:
import pandas as pd
import os
from Employee_Attition_End_to_end_ML_project_with_MLflow import logger
import joblib

In [10]:
import mlflow
import mlflow.sklearn 
from sklearn.base import clone
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [18]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig,model):
        self.config = config
        self.model = model

    
    def train(self):
            # Set MLflow server URI
            mlflow.set_tracking_uri('https://dagshub.com/JAISON14/End_to_End_Implementation.mlflow')
            
            mlflow.set_experiment(f"Experiment_{self.config.model_type}")
            
            with mlflow.start_run(run_name=f"Run_{self.config.model_type}") as run:
                train_x = joblib.load(self.config.train_data_path)
                test_x = joblib.load(self.config.test_data_path)
                train_y = joblib.load(self.config.train_target_path)
                test_y = joblib.load(self.config.test_target_path)
                
                model = clone(self.model)
                model.set_params(**self.config.hyperparameters)  
                model.fit(train_x, train_y)
                
                # Predict on train and test sets
                train_pred = model.predict(train_x)
                test_pred = model.predict(test_x)
                
                # Calculate metrics for the train set
                train_accuracy = accuracy_score(train_y, train_pred)
                train_precision = precision_score(train_y, train_pred, average='weighted')
                train_recall = recall_score(train_y, train_pred, average='weighted')
                train_f1 = f1_score(train_y, train_pred, average='weighted')
                
                # Log metrics to MLflow
                mlflow.log_metric("train_accuracy", train_accuracy)
                mlflow.log_metric("train_precision", train_precision)
                mlflow.log_metric("train_recall", train_recall)
                mlflow.log_metric("train_f1", train_f1)
                
                # Calculate metrics for the test set
                test_accuracy = accuracy_score(test_y, test_pred)
                test_precision = precision_score(test_y, test_pred, average='weighted')
                test_recall = recall_score(test_y, test_pred, average='weighted')
                test_f1 = f1_score(test_y, test_pred, average='weighted')
                
                # Log metrics to MLflow
                mlflow.log_metric("test_accuracy", test_accuracy)
                mlflow.log_metric("test_precision", test_precision)
                mlflow.log_metric("test_recall", test_recall)
                mlflow.log_metric("test_f1", test_f1)
                
                # Log hyperparameters
                mlflow.log_params(self.config.hyperparameters)
                
                # Log model
                mlflow.sklearn.log_model(model, f"{self.config.model_name}_{self.config.model_type}")
                joblib.dump(model, os.path.join(self.config.root_dir, self.config.model_type))
            # mlflow.sklearn.log_model(lr, f"{self.config.model_name}")
            # joblib.dump(lr, os.path.join(self.config.root_dir, self.config.model_name))




In [12]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
#from imblearn.ensemble import RUSBoostClassifier, EasyEnsembleClassifier

In [19]:
try:
    config_manager = ConfigurationManager()
    models = [
        (LogisticRegression(random_state=42), "Logistic Regression"),
        # (SVC(random_state=42), "Support Vector Machines"),
        # (KNeighborsClassifier(), "K-Nearest Neighbors"),
        # (DecisionTreeClassifier(random_state=42), "Decision Trees"),
        # (RandomForestClassifier(random_state=42), "Random Forest"),
        # (GradientBoostingClassifier(random_state=42), "Gradient Boosting"),
        # (MLPClassifier(random_state=42), "Neural Networks"),
        # (GaussianNB(), "Naive Bayes"),
        # (AdaBoostClassifier(random_state=42), "AdaBoost"),
        # (XGBClassifier(random_state=42), "XGBoost"),
        # (LGBMClassifier(random_state=42), "LightGBM"),
        # (CatBoostClassifier(random_state=42, verbose=0), "CatBoost")
    ]

    for model, model_type in models:
        model_trainer_config = config_manager.get_model_trainer_config(model_type=model_type)
        trainer = ModelTrainer(config=model_trainer_config, model=model)  # Pass the model here
        trainer.train()
except Exception as e:
    raise e

[2023-09-23 14:45:06,254: INFO: common: yaml file: config\config.yaml loaded successfully]
[2023-09-23 14:45:06,265: INFO: common: yaml file: params.yaml loaded successfully]
[2023-09-23 14:45:06,301: INFO: common: yaml file: schema.yaml loaded successfully]
[2023-09-23 14:45:06,307: INFO: common: created directory at: artifacts]
[2023-09-23 14:45:06,315: INFO: common: created directory at: artifacts/model_trainer]
