In [1]:
import os
os.chdir('../')
%pwd

'd:\\Python\\Industry level\\EndtoEnd combination\\AI-Resume-Screening-with-ML-ops'

In [2]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: str
    X_train_path: str
    y_train_path: str
    X_test_path: str
    y_test_path: str
    params: dict

In [3]:
from resumeScreening.constants import *
from resumeScreening.utils.common import read_yaml,create_directories

In [4]:
class ConfigurationManager:
    def __init__(
            self,
            config_filepath = CONFIG_FILE_PATH,
            params_filepath = PARAMS_FILE_PATH,
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_training_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        create_directories([config.root_dir])

        model_training_config = ModelTrainerConfig(
            root_dir= config.root_dir,
            X_train_path= config.X_train_path,
            y_train_path=config.y_train_path,
            X_test_path= config.X_test_path,
            y_test_path= config.y_test_path,
            params= self.params.model_training
        )

        return model_training_config

In [11]:
import pickle
import numpy as np
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from resumeScreening import logger

In [9]:
class ModelTrainer:
    def __init__(self,config: ModelTrainerConfig):
        self.config = config
        self.X_train = pickle.load(open(self.config.X_train_path, "rb"))
        self.y_train = np.load(self.config.y_train_path)
        self.X_test = pickle.load(open(self.config.X_test_path, "rb"))
        self.y_test = np.load(self.config.y_test_path)

    def train_model(self,model,param_grid,name):
        logger.info(f"Training: {name}")
        grid = GridSearchCV(model,param_grid,cv=5,n_jobs=-1,verbose=1)
        grid.fit(self.X_train,self.y_train)
        best_model = grid.best_estimator_
        logger.info(f"Best model is {best_model}")


         # Save the best model
        with open(f"{self.config.root_dir}/{name.lower().replace(' ', '_')}.pkl", "wb") as f:
            pickle.dump(best_model, f)

    def train_all(self):
        self.train_model(
            LogisticRegression(max_iter=1000),
            self.config.params.logistic_regression,
            "LogisticRegression"
        )

        self.train_model(
            RandomForestClassifier(),
            self.config.params.random_forest,
            "RandomForestClassifier"
        )

        self.train_model(
            SVC(),
            self.config.params.svm,
            "SVM"
        )


In [13]:
try:
    config = ConfigurationManager()
    get_model_training_config = config.get_model_training_config()
    model_trainer = ModelTrainer(config = get_model_training_config)
    model_trainer.train_all()
except Exception as e:
    raise e

[2025-08-06 04:30:16,193: INFO: yaml file: config\config.yaml loaded successfully]
[2025-08-06 04:30:16,201: INFO: yaml file: params.yaml loaded successfully]
[2025-08-06 04:30:16,203: INFO: Created directory at: artifacts]
[2025-08-06 04:30:16,205: INFO: Created directory at: artifacts/model_trainer]
[2025-08-06 04:30:16,217: INFO: Training: LogisticRegression]
Fitting 5 folds for each of 9 candidates, totalling 45 fits
[2025-08-06 04:30:41,487: INFO: Best model is LogisticRegression(C=1, max_iter=1000)]
[2025-08-06 04:30:41,549: INFO: Training: RandomForestClassifier]
Fitting 5 folds for each of 8 candidates, totalling 40 fits


20 fits failed out of a total of 40.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
20 fits failed with the following error:
Traceback (most recent call last):
  File "d:\Python\Industry level\EndtoEnd combination\AI-Resume-Screening-with-ML-ops\resume\Lib\site-packages\sklearn\model_selection\_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "d:\Python\Industry level\EndtoEnd combination\AI-Resume-Screening-with-ML-ops\resume\Lib\site-packages\sklearn\base.py", line 1358, in wrapper
    estimator._validate_params()
  File "d:\Python\Industry level\EndtoEnd combination\AI-Resume-Screening-with-ML-ops\resume\Lib\site-packages\sklearn\base.py", line 471, in _validate_params
    validate_

[2025-08-06 04:30:52,038: INFO: Best model is RandomForestClassifier(max_depth=10, min_samples_split=5, n_estimators=200)]
[2025-08-06 04:30:52,381: INFO: Training: SVM]
Fitting 5 folds for each of 4 candidates, totalling 20 fits
[2025-08-06 04:30:58,725: INFO: Best model is SVC(C=1, kernel='linear')]
