Inside - src/components/model_trainer.py

Train and test datasets
Convert datasets into X Features and y target column
import the processor pickle file
transform the X Features (don't process the target column)

Train the model
Give all the models
give all the params for all the models
train using grid search CV
Get the best metric
- Create the function required for model evaluation
- Get all the metrics
- Get all the params

```Python

```

## Inside src/utils.py

```python
import os
import sys
from pathlib import Path
import numpy as np
import pandas as pd
import pickle

from src.exception import CustomException
from src.logger import logging

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score

## For RandomizedSearchCV
#from sklearn.model_selection import RandomizedSearchCV
#from scipy.stats import uniform


def save_object(file_path, obj):
    """
    Saves object. Saves pickle file object
    from src.utils import save_object
    save_object(file_path=, obj=)

    Note: When using RandomizedSearchCV
    Example of using it
    from sklearn.model_selection import RandomizedSearchCV
    from scipy.stats import uniform
    param_dist = {
    'C': uniform(0.1, 10),  # Uniform distribution between 0.1 and 10
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto'] + list(np.logspace(-3, 3, 50))
    }
    # Create the RandomizedSearchCV object
    randomized_search = RandomizedSearchCV(estimator=baseline_svm, param_distributions=param_dist, n_iter=20, cv=5)
    
    randomized_search.fit(X_train, y_train)
    
    # Get the best hyperparameters and model
    best_params_rand = randomized_search.best_params_
    best_model_rand = randomized_search.best_estimator_
    
    # Evaluate the best model
    y_pred_best_rand = best_model_rand.predict(X_test)
    accuracy_best_rand = accuracy_score(y_test, y_pred_best_rand)
    print(f"Best SVM Accuracy: {accuracy_best_rand:.2f}")
    print(f"Best Hyperparameters: {best_params_rand}")
    """
    try:
        dir_path = os.path.dirname(file_path)
        os.makedirs(dir_path, exist_ok=True)
        
        ## Error. It is not with os.open()
        #with os.open(file_path, "wb") as file_obj:
        with open(file_path, "wb") as file_obj:
            pickle.dump(obj, file_obj)
    except Exception as e:
        raise CustomException(e,sys)
    

def load_object(file_path):
    try:
        #pass
        with open(file_path, "rb") as file_obj:
            return pickle.load(file_obj)
    
    except Exception as e:
        raise CustomException(e, sys)



def evaluate_models(X_train, y_train,X_test,y_test,models,param):
    try:
        report = {}

        for i in range(len(list(models))):
            model = list(models.values())[i]
            para=param[list(models.keys())[i]]
            
            ## If Using RandomizedSearchCV
            #rs = RandomizedSearchCV(model, param_distributions=para,cv=3)
            #rs.fit(X_train, y_train)
            #best_params_rand = model.set_params(**rs.best_params_)
            ##best_model_rand = rs.best_estimator_

            ## If using GridSearchCV
            gs = GridSearchCV(model,para,cv=3)
            gs.fit(X_train,y_train)

            ## Unpack the dictionary
            model.set_params(**gs.best_params_)
            #model = model.set_params(**gs.best_params_)
            model.fit(X_train,y_train)

            #model.fit(X_train, y_train)  # Train model

            y_train_pred = model.predict(X_train)

            y_test_pred = model.predict(X_test)

            train_model_score = r2_score(y_train, y_train_pred)

            test_model_score = r2_score(y_test, y_test_pred)

            report[list(models.keys())[i]] = test_model_score

        return report

    except Exception as e:
        raise CustomException(e, sys)



# def evaluate_models(X_train, y_train, X_test, y_test, models, param):
#     """
#     Evaluates models, performs Grid Search, Selects the best parameters
#     Selects the best models trains the model and performs metric evaluation
#     from src.utils import evaluate_models
#     evaluate_models(X_train=, y_train=, X_test=, y_test=, models=, param=)

#     Args
#       X_train
#       y_train
#       X_test
#       y_test
#       models
#       param

#       Calculates R2_Score

#     Returns
#       Report: dict Report
    
#       from src.utils import evaluate_models
#       evaluate_models(X_train=, y_train=, X_test=, y_test=, models=, param=)
#     """
#     try:
#         #pass
#         report = {}

#         for i in range(len(list(models))):
#             model = list(models.values())[i]
#             para=param[list(models.keys())[i]]

#             gs = GridSearchCV(model, para, cv=3)
#             gs.fit(X_train, y_train)
#             #model.fit(X_train, y_train)
            
#             ## Getting best parameters and training the model with them
#             model.set_params(**gs.best_params_)
#             model.fit(X_train,y_train)

#             y_train_pred = model.predict(X_train)
#             y_test_pred = model.predict(X_test)

#             train_model_score = r2_score(y_train, 
#                                          y_train_pred)
            
#             test_model_score = r2_score(y_test,
#                                         y_test_pred)
            
#             report[list(models.keys())[i]] = test_model_score
#         logging.info(f"Returning Report from utils.evaluate_models function")
#         return report
    
#     except Exception as e:
#         raise CustomException(e, sys)
```

Inside model_trainer.py - src/components/model_trainer.py

```python
import os
import sys
from dataclasses import dataclass
#import numpy as np
#import pandas as pd

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import (
    AdaBoostRegressor,
    GradientBoostingRegressor,
    RandomForestRegressor
)
from catboost import CatBoostRegressor
from xgboost import XGBRegressor

from sklearn.metrics import r2_score

from src.exception import CustomException
from src.logger import logging

from src.utils import save_object, evaluate_models

@dataclass
class ModelTrainerConfig:
    trained_model_file_path = os.path.join("artifacts", "model.pkl")

class ModelTrainer:
    def __init__(self):
        self.model_trainer_config = ModelTrainerConfig()
    
    def initiate_model_trainer(self, train_array, test_array):
        try:
            #pass
            logging.info("Model Trainer Started")
            logging.info("Splitting the Train and Test Arrays")
            X_train,y_train,X_test,y_test=(
                train_array[:,:-1],
                train_array[:,-1],
                test_array[:,:-1],
                test_array[:,-1]
            )
            models = {
                "Random Forest": RandomForestRegressor(),
                "Decision Tree": DecisionTreeRegressor(),
                "Gradient Boosting": GradientBoostingRegressor(),
                "Linear Regression": LinearRegression(),
                "XGBRegressor": XGBRegressor(),
                "CatBoosting Regressor": CatBoostRegressor(verbose=False),
                "AdaBoost Regressor": AdaBoostRegressor(),
            }
            params={
                "Decision Tree": {
                    'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
                    # 'splitter':['best','random'],
                    # 'max_features':['sqrt','log2'],
                },
                "Random Forest":{
                    # 'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
                 
                    # 'max_features':['sqrt','log2',None],
                    'n_estimators': [8,16,32,64,128,256]
                },
                "Gradient Boosting":{
                    # 'loss':['squared_error', 'huber', 'absolute_error', 'quantile'],
                    'learning_rate':[.1,.01,.05,.001],
                    'subsample':[0.6,0.7,0.75,0.8,0.85,0.9],
                    # 'criterion':['squared_error', 'friedman_mse'],
                    # 'max_features':['auto','sqrt','log2'],
                    'n_estimators': [8,16,32,64,128,256]
                },
                "Linear Regression":{},
                "XGBRegressor":{
                    'learning_rate':[.1,.01,.05,.001],
                    'n_estimators': [8,16,32,64,128,256]
                },
                "CatBoosting Regressor":{
                    'depth': [6,8,10],
                    'learning_rate': [0.01, 0.05, 0.1],
                    'iterations': [30, 50, 100]
                },
                "AdaBoost Regressor":{
                    'learning_rate':[.1,.01,0.5,.001],
                    # 'loss':['linear','square','exponential'],
                    'n_estimators': [8,16,32,64,128,256]
                }
                
            }

            model_report:dict=evaluate_models(X_train=X_train,
                                              y_train=y_train,
                                              X_test=X_test,
                                              y_test=y_test,
                                              models=models,
                                              param=params)
            
            ###Model Report Returns:
            ###report[list(models.keys())[i]] = test_model_score

            ## To get best model score from dict
            best_model_score = max(sorted(model_report.values()))
            #logging.info("Getting the best model score")

            ##To get the best model name from dict
            ### model_report.keys()[getting index of best model score]
            best_model_name = list(model_report.keys())[list(model_report.values()).index(best_model_score)]
            ## Getting from all models the model name
            best_model = models[best_model_name]
            
            print(f"Best Model Score:\n{best_model}")
            #print("Best Model Score:          ", end=" ")
            #print(best_model)

            if best_model_score<0.6:
                raise CustomException("Best Model Score is less than 0.6. Model is not good enough to be deployed.")
            
            ## Dumping the model in a pickle file
            save_object(
                file_path=self.model_trainer_config.trained_model_file_path,
                obj=best_model
            )

            ## Create prediction with best model and perform evaluation
            predicted_test = best_model.predict(X_test)
            r2_square_test = r2_score(y_true=y_test, y_pred=predicted_test)
            
            return r2_square_test
        
        except Exception as e:
            raise CustomException(e,sys)
```