In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Lasso, Ridge, LinearRegression, ElasticNet
from sklearn.svm import SVR
os.chdir("../")


In [2]:
%pwd

'c:\\Users\\agost\\Data_Career\\End_to_end_projects\\Abalone_Age_Prediction'

In [3]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelPreparationTrainingConfig:
    root_dir: Path
    local_data_file: Path
    trained_model_path: Path
    param_n_estim_rfr: int
    param_random_state: int
    param_alpha_lasso: float
    param_alpha_ridge: float
    param_alpha_elastic: float
    param_l1_elastic: float
    param_c_svr: float
    param_target_col: str


In [4]:
from Abalone_Age_Prediction.utils.common import create_directories, read_yaml, save_object
from Abalone_Age_Prediction.constants import *
from Abalone_Age_Prediction import logger

In [5]:
class ConfigurationManager:
    def __init__(self,
                 config_filepath = CONFIG_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root]) #the artifacts_root is the key of the dictionary created
                                                # in the yaml file and we can read this key like that instead of
                                                # ["artifacts_root"] because we used the ConfigBox in the common.py file


    def get_model_preparation_training_config(self) -> ModelPreparationTrainingConfig:
        config = self.config.model_preparation_training #model_preparation_training is the other key value of the dictionary in the config.yaml file

        model_preparation_training_config = ModelPreparationTrainingConfig(
            root_dir=config.root_dir,
            local_data_file = config.local_data_file,
            trained_model_path = config.trained_model_path,
            param_n_estim_rfr=self.params.N_ESTIMATORS_RFR,
            param_random_state=self.params.RANDOM_STATE,
            param_alpha_lasso=self.params.ALPHA_LASSO,
            param_alpha_ridge=self.params.ALPHA_RIDGE,
            param_alpha_elastic=self.params.ALPHA_ELASTIC,
            param_l1_elastic=self.params.L1_RATIO_ELASTIC,
            param_c_svr=self.params.C_SVR,
            param_target_col=self.params.TARGET
        )                                     

        return model_preparation_training_config

In [6]:
class ModelPreparationTraining:
    def __init__(self, config: ModelPreparationTrainingConfig):
        self.config = config

    
    
    def read_file(self):
        """
        Read the csv file
        """
        try:
            
            training_data = pd.read_csv(self.config.local_data_file)

            return training_data

        except Exception as e:
            e

    def model_preparation_training(self,training_data):
        """
        Prepare the data and the model, and then train and save the trained models
        """
        X_train = training_data.drop(labels = self.config.param_target_col, axis = 1)
        y_train = training_data[self.config.param_target_col]

        rfr = RandomForestRegressor(n_estimators=self.config.param_n_estim_rfr, random_state=self.config.param_random_state)
        rfr.fit(X_train,y_train)
        svr = SVR(C=self.config.param_c_svr)
        svr.fit(X_train,y_train)
        lr = LinearRegression()
        lr.fit(X_train,y_train)
        lasso = Lasso(alpha=self.config.param_alpha_lasso)
        lasso.fit(X_train,y_train)
        ridge = Ridge(alpha=self.config.param_alpha_ridge)
        ridge.fit(X_train,y_train)
        elastic = ElasticNet(alpha=self.config.param_alpha_elastic, l1_ratio=self.config.param_l1_elastic)
        elastic.fit(X_train,y_train)

        rfr_pkl = "rfr.pkl"
        svr_pkl = "svr.pkl"
        lr_pkl = "lr.pkl"
        lasso_pkl = "lasso.pkl"
        ridge_pkl = "ridge.pkl"
        elastic_pkl = "elastic.pkl"

        save_object(Path(self.config.trained_model_path),rfr,rfr_pkl)
        save_object(Path(self.config.trained_model_path),svr,svr_pkl)
        save_object(Path(self.config.trained_model_path),lr,lr_pkl)
        save_object(Path(self.config.trained_model_path),lasso,lasso_pkl)
        save_object(Path(self.config.trained_model_path),ridge,ridge_pkl)
        save_object(Path(self.config.trained_model_path),elastic,elastic_pkl)
        
        return None
    

In [7]:
try:
    config = ConfigurationManager()
    model_preparation_training_config = config.get_model_preparation_training_config()
    model_preparation_training = ModelPreparationTraining(config=model_preparation_training_config)
    training_data = model_preparation_training.read_file()
    model_preparation_training.model_preparation_training(training_data)
except Exception as e:
    raise e


[2024-06-13 23:33:17,320: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-06-13 23:33:17,322: INFO: common: yaml file: params.yaml loaded successfully]
[2024-06-13 23:33:17,323: INFO: common: created directory at: artifacts]
