In [1]:
import os

In [2]:
os.getcwd()

'c:\\TheBritishCollege\\DataScience\\Wafer_sensor_project\\research'

In [3]:
os.chdir("../")

In [4]:
os.getcwd()

'c:\\TheBritishCollege\\DataScience\\Wafer_sensor_project'

In [41]:
from pathlib import Path
from dataclasses import dataclass

In [42]:
@dataclass(frozen=True)
class ModelTrainerConfig: 
    root_dir: Path
    model_path: Path
    model_yaml: Path
    train_arr_path: Path
    test_arr_path: Path


In [43]:
from wafer_project import logger
from wafer_project.utils.common import read_yaml, create_directories,save_object

from wafer_project.constant import *

In [44]:
class ConfigurationManager: 
    def __init__(self, config_file_path=CONFIG_FILE_PATH, 
                 parmas_file_path=PARAMS_FILE_PATH,
                 scheme_file_path=SCHEMA_FILE_PATH
                 ): 
        self.config=read_yaml(config_file_path)
        self.params=read_yaml(parmas_file_path)
        self.schema=read_yaml(scheme_file_path)
        
        create_directories([self.config.artifacts_root])
        
    def get_model_trainer_config(self)-> ModelTrainerConfig: 
        config=self.config.model_trainer
        create_directories([config.root_dir])
        
        model_trainer_config=ModelTrainerConfig(
            root_dir=config.root_dir, 
            model_path=config.model_path,
            model_yaml=config.model_yaml,
            train_arr_path=config.train_arr_path,
            test_arr_path=config.test_arr_path
            
        )
        return model_trainer_config
        
        

In [45]:
import sys
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV


In [46]:
class ModelTrainer: 
    def __init__(self, config=ModelTrainerConfig): 
        self.config=config
        
        self.model={
            'Grd': GradientBoostingClassifier(), 
            'Random_forest': RandomForestClassifier(), 
            'GRboost': GradientBoostingClassifier()
        }
        
        
        
    def evaluate_models(self, x_train, y_train, x_test, y_test, models): 
        try: 
            report={}
            y_train=np.array(y_train)
            y_test=np.array(y_test)
            
            for i in range(len(models)):
                model= list(models.values())[i]
                logger.info("model is created") 
                model.fit(x_train, y_train)
                
                y_train_pred=model.predict(x_test)
                
                test_model_score=accuracy_score(y_test, y_train_pred)
                
                report[list(models.keys())[i]]=test_model_score
            
            return report
                
                
                
                # logger.info(f"Model: {model}") 
                
        except Exception as e:
            raise e
    
    def fine_tune_best_model(self, best_model_object: object,
                             best_model: str, x_train, y_train,
                             params: dict
                             ):
        grid_search=GridSearchCV(best_model_object,
                                 param_grid=params,
                                 cv=5,
                                 verbose=3
                                 )
        logger.info("finetuning started\n\n")
        grid_search.fit(x_train,y_train)
        best_params=grid_search.best_params_
        
        logger.info(f"Best paramater for {best_model} is {best_params}")
        
        finetuned_model=best_model_object.set_params(**best_params)
        
        return finetuned_model
        
                
    def initate_model_trainer(self):
        try: 
            train_arr=self.config.train_arr_path
            test_arr=self.config.test_arr_path
            
            x=pd.read_csv(train_arr)
            y=pd.read_csv(test_arr)
            
            x_train=x.drop("Good/Bad", axis=1)
            y_train=x["Good/Bad"]
            x_test=y.drop("Good/Bad", axis=1)
            y_test=y["Good/Bad"]
                        
            # logger.info(f"x_train: {x_train.head(2)}\n\\n\n")
            # logger.info(f"y_train: {y_train.head(2)}\n\\n\n")
            # logger.info(f"x_test: {x_test.head(2)}\n\\n\n")
            # logger.info(f"y_test: {y_test.head(2)}\n\\n\n")
            model_report: dict=self.evaluate_models(x_train=x_train,
                                                    y_train=y_train,
                                                    x_test=x_test,
                                                    y_test=y_test,
                                                    models=self.model) 
               
            logger.info(f"Model Report : {model_report}")
            
            best_model_score=max(sorted(model_report.values()))
            
            Best_model_name=list(model_report.keys())[list(model_report.values()).index(best_model_score)]
            
            logger.info(f"Best model Name: {Best_model_name}")
            
            Best_model=self.model[Best_model_name]
            
            params_grid={
                'n_estimators': [100, 200, 300],
                'max_depth': ['None', 5, 10],
                'min_samples_split': [2, 5, 10],
                'min_samples_leaf': [1, 2, 4]}
                            
            best_model_tune=self.fine_tune_best_model(best_model_object=Best_model,
                                                 best_model=Best_model_name,
                                                 x_train=x_train, 
                                                 y_train=y_train,
                                                 params=params_grid
                                                 )
            best_model_tune.fit(x_train,y_train)
            y_pred=best_model_tune.predict(x_test)
            
            model_accuracy=accuracy_score(y_test, y_pred)
            
            logger.info(f"The best model name: {Best_model_name} with accuracy: {model_accuracy}")
            
            save_object(file_path=Path(self.config.model_path), obj=best_model_tune)
            logger.info(f"Model saved at {self.config.model_path}")
                 
        except Exception as e: 
            raise e
            
    
    

In [47]:
configmanager=ConfigurationManager()
config_model_config=configmanager.get_model_trainer_config()
model_trainer=ModelTrainer(config_model_config)
model_trainer.initate_model_trainer()


[2023-12-24 22:02:45,914 : INFO : common : Yaml file read sucessfully: config\config.yaml]
[2023-12-24 22:02:45,977 : INFO : common : Yaml file read sucessfully: params.yaml]
[2023-12-24 22:02:45,991 : INFO : common : Yaml file read sucessfully: schema.yaml]
[2023-12-24 22:02:45,997 : INFO : common :  Directories is created : artifacts]
[2023-12-24 22:02:46,033 : INFO : common :  Directories is created : artifacts/model_trainer]


[2023-12-24 22:02:46,822 : INFO : 3112322613 : model is created]
[2023-12-24 22:02:55,637 : INFO : 3112322613 : model is created]
[2023-12-24 22:02:56,145 : INFO : 3112322613 : model is created]
[2023-12-24 22:03:00,592 : INFO : 3112322613 : Model Report : {'Grd': 0.9, 'Random_forest': 1.0, 'GRboost': 0.9}]
[2023-12-24 22:03:00,594 : INFO : 3112322613 : Best model Name: Random_forest]
[2023-12-24 22:03:00,604 : INFO : 3112322613 : finetuning started

]
Fitting 5 folds for each of 81 candidates, totalling 405 fits
[CV 1/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100;, score=nan total time=   0.0s
[CV 2/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100;, score=nan total time=   0.0s
[CV 3/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100;, score=nan total time=   0.0s
[CV 4/5] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100;, score=nan total time=   0.0s
[CV 5/5] 

135 fits failed out of a total of 405.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
135 fits failed with the following error:
Traceback (most recent call last):
  File "c:\TheBritishCollege\DataScience\Wafer_sensor_project\wafer_enc\lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\TheBritishCollege\DataScience\Wafer_sensor_project\wafer_enc\lib\site-packages\sklearn\base.py", line 1145, in wrapper
    estimator._validate_params()
  File "c:\TheBritishCollege\DataScience\Wafer_sensor_project\wafer_enc\lib\site-packages\sklearn\base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "c:\TheBritishCollege\DataScie

[2023-12-24 22:06:49,562 : INFO : 3112322613 : Best paramater for Random_forest is {'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}]
[2023-12-24 22:06:50,025 : INFO : 3112322613 : The best model name: Random_forest with accuracy: 1.0]
[2023-12-24 22:06:50,084 : INFO : 3112322613 : Model saved at artifacts/model_trainer/model.pkl]
