In [11]:
import os
import sys
from logger.logger import logging
from pathlib import Path
from dataclasses import dataclass
from exception.exception import customexception
from utils.common import create_directories,save_object,read_yaml,evaluate_model,get_transform_data
from sklearn.linear_model import LinearRegression,ElasticNet,Ridge,Lasso
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor

In [12]:
#config_entity -> configuration_manager -> component -> pipeline -> main

In [13]:
@dataclass(frozen=True)
class ModelTrainingConfig:
    root_dir: Path
    trained_model_path: Path
    transform_data_path: Path

In [14]:
from constants import CONFIG_FILE_PATH

class ConfigurationManager:
    def __init__(self,config_file_path=CONFIG_FILE_PATH):
        self.config = read_yaml(config_file_path)
        create_directories([self.config.artifacts_root])
        
    def get_model_trainer_config(self)->ModelTrainingConfig:
        training_config = self.config.model_training
        transform_config = self.config.data_transformation
        
        create_directories([training_config.root_dir])
        model_trainer_config = ModelTrainingConfig(
            root_dir = training_config.root_dir,
            trained_model_path=training_config.model_path,
            transform_data_path=transform_config.transform_data_files
        )
        return model_trainer_config

In [18]:
from utils.common import evaluate_model
class ModelTraining:
    def __init__(self,config:ModelTrainingConfig):
        self.config = config
        
    def best_model(self):
        try:
            logging.info("Searching for best model with highest accuracy")
            X_train,y_train,X_test,y_test = get_transform_data(path_to_folder=self.config.transform_data_path)
            models = {
                "LinearRegression":LinearRegression(),
                "LassoRegression":Lasso(),
                "RidgeRegression":Ridge(),
                "ElasticNet":ElasticNet(),
                "XGBoost":XGBRegressor(),
                "RandomForest":RandomForestRegressor()
            }
            report = evaluate_model(X_train,y_train,X_test,y_test,models)
            # print(report)
            model_dict = sorted(report.items(),key=lambda item:item[1],reverse=True)
            print(model_dict)
            model_obj,accuracy = model_dict[0]
            logging.info(f"{model_obj} has highest accuracy of {accuracy}")
            return model_obj,X_train,y_train
        except Exception as e:
            raise customexception(e,sys)
        
    def trainer(self):
        model_obj,X_train,y_train = self.best_model()
        model_obj = model_obj.fit(X_train,y_train)
        save_object(self.config.trained_model_path,model_obj)
    
    

In [19]:
#cd ..

In [20]:
try:
    logging.info("Model training Started")
    config = ConfigurationManager()
    model_training_config = config.get_model_trainer_config()
    model_training = ModelTraining(model_training_config)
    model_training.trainer()
    logging.info("Model training Completed")
except Exception as e:
    raise customexception(e,sys)

{'LinearRegression': LinearRegression(), 'LassoRegression': Lasso(), 'RidgeRegression': Ridge(), 'ElasticNet': ElasticNet(), 'XGBoost': XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=None, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=None, max_leaves=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=None, n_jobs=None,
             num_parallel_tree=None, random_state=None, ...), 'RandomForest': RandomForestRegressor()}
[(XGBRegressor(base_score=None, booster=None, callbacks=None,
             

In [None]:
models = {
            "LinearRegression":LinearRegression(),
            "LassoRegression":Lasso(),
            "RidgeRegression":Ridge(),
            "ElasticNet":ElasticNet(),
            "XGBoost":XGBRegressor(),
            "RandomForest":RandomForestRegressor()
        }
report = {}
for i in range(len(models)):
    model = list(models.values())[i]
    key_n = list(models.keys())[i]
    print(key_n)

LinearRegression
LassoRegression
RidgeRegression
ElasticNet
XGBoost
RandomForest
