In [2]:
import os

In [3]:
%pwd

'g:\\Machine_Learning_Projects\\iNeuron internship\\Flight-Fare-Prediction-End-to-End-ML-Project\\research'

In [4]:
os.chdir("../")

In [5]:
%pwd

'g:\\Machine_Learning_Projects\\iNeuron internship\\Flight-Fare-Prediction-End-to-End-ML-Project'

In [6]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    alpha: float
    l1_ratio: float
    target_column: int

In [7]:
from mlProject.constants import *
from mlProject.utils.common import read_yaml, create_directories

In [8]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.ElasticNet
        schema =  self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path = config.train_data_path,
            test_data_path = config.test_data_path,
            # New Line Added
            trained_model_file_path =  os.path.join('artifact', 'model'),
            # New Line Ended
            model_name = config.model_name,
            alpha = params.alpha,
            l1_ratio = params.l1_ratio,
            target_column = schema.name
            
        )

        return model_trainer_config

In [9]:
import pandas as pd
import os
from mlProject import logger
from sklearn.linear_model import ElasticNet
import joblib

In [None]:
# class ModelTrainer:
#     def __init__(self, config: ModelTrainerConfig):
#         self.config = config

    
#     def train(self):
#         train_data = pd.read_csv(self.config.train_data_path)
#         test_data = pd.read_csv(self.config.test_data_path)

#         X_train = train_data.drop([self.config.target_column], axis=1)
#         X_test = test_data.drop([self.config.target_column], axis=1)
#         y_train = train_data[[self.config.target_column]]
#         y_test = test_data[[self.config.target_column]]

#         # train_x = train_data.drop([self.config.target_column], axis=1)
#         # test_x = test_data.drop([self.config.target_column], axis=1)
#         # train_y = train_data[[self.config.target_column]]
#         # test_y = test_data[[self.config.target_column]]


#         lr = ElasticNet(alpha=self.config.alpha, l1_ratio=self.config.l1_ratio, random_state=42)
#         lr.fit(X_train, y_train)
#         # lr.fit(train_x, train_y)

#         joblib.dump(lr, os.path.join(self.config.root_dir, self.config.model_name))



In [10]:
import os
import sys

import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor

from sklearn.metrics import r2_score

# from ..exception import CustomException
# from ..logger import logging

# from ..utils import save_obj
# from ..utils import evaluate_model   

# from dataclasses import dataclass

# @dataclass
# class ModelTrainerConfig:
#     trained_model_file_path =  os.path.join('artifact', 'model')


class ModelTrainer:

    def __init__(self):
        self.model_trainer_config = ModelTrainerConfig()    


    def save_obj(file_path, obj):
        try:

            dir_path = os.path.dirname(file_path)
            os.makedirs(dir_path, exist_ok=True)

            with open(file_path, 'wb') as file_obj:
                joblib.dump(obj, file_obj, compress= ('gzip'))

        except Exception as e:
            logger.info('Error occured in utils save_obj')
            raise e
        

    def evaluate_model(X_train, y_train, X_test, y_test, models):

        try:
            report = {}
            for i in range(len(models)):

                model = list(models.values())[i]

                # Train model
                model.fit(X_train,y_train)

                # Predict Testing data
                y_test_pred = model.predict(X_test)

                # Get R2 scores for train and test data
                test_model_score = r2_score(y_test,y_test_pred)

                report[list(models.keys())[i]] =  test_model_score

            return report

        except Exception as e:
            logger.info('Exception occured during model training')
            raise e    



    def initiate_model_training(self, X_train, X_test, y_train, y_test):
        try:
            logger.info('Splitting ')

            models={
            'LinearRegression':LinearRegression(),
            'Lasso':Lasso(),
            'Ridge':Ridge(),
            'Elasticnet':ElasticNet(),
            'RandomForestRegressor': RandomForestRegressor(),
            'GradientBoostRegressor()' : GradientBoostingRegressor(),
            "AdaBoost" : AdaBoostRegressor(),
            'DecisionTreeRegressor' : DecisionTreeRegressor(),
            "SupportVectorRegressor" : SVR(),
            "KNN" : KNeighborsRegressor()
            }

            model_report:dict = ModelTrainer.evaluate_model(X_train,y_train, X_test, y_test, models)
            print(model_report)
            print("\n====================================================================================")
            logger.info(f'Model Report : {model_report}')

            # to get best model score from dictionary
            best_model_score = max(sorted(model_report.values()))

            best_model_name = list(model_report.keys())[
                list(model_report.values()).index(best_model_score)
            ]

            best_model = models[best_model_name]

            print(f"Best Model Found, Model Name :{best_model_name}, R2-score: {best_model_score}")
            print("\n====================================================================================")
            logger.info(f"Best Model Found, Model name: {best_model_name}, R2-score: {best_model_score}")
            logger.info(f"{best_model.feature_names_in_}")
            
            ModelTrainer.save_obj(
            file_path = self.model_trainer_config.trained_model_file_path,
            obj = best_model
            )

        except Exception as e:
            logger.info('Exception occured at model trianing')
            raise e
        

In [11]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config)
    model_trainer_config.train()
except Exception as e:
    raise e

[2023-12-05 08:05:47,529: INFO: common: yaml file: config\config.yaml loaded successfully]
[2023-12-05 08:05:47,544: INFO: common: yaml file: params.yaml loaded successfully]
[2023-12-05 08:05:47,571: INFO: common: yaml file: schema.yaml loaded successfully]
[2023-12-05 08:05:47,573: INFO: common: created directory at: artifacts]
[2023-12-05 08:05:47,578: INFO: common: created directory at: artifacts/model_trainer]


TypeError: __init__() got an unexpected keyword argument 'trained_model_file_path'