In [1]:
from dataclasses import dataclass
from pathlib import Path

In [2]:
@dataclass(frozen=True)
class ModelTrainingConfig:
    root_dir : Path
    model_file_path : Path
    data_path : Path
    criterion : str
    max_depth : int
    test_data_filepath : Path
    train_data_filepath : Path


In [3]:
import pandas as pd
import numpy as np

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [4]:
from regression.constants import constant
from regression.utils.common_func import read_yaml, create_dir
from regression import logger
import joblib

In [5]:
class ModelConfigurationManager:
    def __init__(
            self,
            param_filepath = constant.PARAMS_FILE_PATH,
            config_filepath = constant.CONFIG_FILE_PATH
            ):
        self.params = read_yaml(param_filepath)
        self.config = read_yaml(config_filepath)

    def get_model_config(self) -> ModelTrainingConfig:
        prepare_model_config = self.config.prepare_model
        data_config = self.config.data_paths
        data_split_config = self.config.data_split
        create_dir([prepare_model_config.root_dir])

        model_config = ModelTrainingConfig(
            root_dir = prepare_model_config.root_dir,
            model_file_path = prepare_model_config.model_file_path,
            data_path = data_config.processed_data,
            criterion = self.params.criterion,
            max_depth = self.params.max_depth,
            test_data_filepath = data_split_config.test_data_filepath,
            train_data_filepath = data_split_config.train_data_filepath
        )

        return model_config

In [6]:
class ModelTraining:
    def __init__(self, config:ModelTrainingConfig):
        self.config = config
        
    def split_data(self) -> str:
        try:
            logger.info(f'Loading Dataset from {self.config.data_path} into Datafram')
            df = pd.read_csv(self.config.data_path)
            logger.info("Splitting the dataset into train and test datasets")
            train_dataset = df[:1450]
            test_dataset = df[1450:]
            logger.info("Saving train and test data into artifact folder")
            train_dataset.to_csv(self.config.train_data_filepath)
            test_dataset.to_csv(self.config.test_data_filepath)
        except Exception as e:
            raise e
    

    def train_model(self) ->str:
        try:
            logger.info("Loading Training Data into dataframe")
            df = pd.read_csv(self.config.test_data_filepath)
            y = df['quality']
            x = df.drop('quality', axis=1)
            logger.info("splitting the data into X and y")
            logger.info("defining the model")

            classifier = DecisionTreeClassifier(criterion = self.config.criterion, max_depth=self.config.max_depth)
            classifier.fit(x, y)

            logger.info("Model is successfully trained")

            joblib.dump(classifier, self.config.model_file_path)

            logger.info("Model is successfully saved at {self.config.model_file_path}")
        
        except Exception as e:
            logger.error(f"Error Occurred during model training {e}")
            raise e
    
        

In [7]:
try:
    modelconfigmanager = ModelConfigurationManager()
    model_config = modelconfigmanager.get_model_config()
    model_training = ModelTraining(model_config)
    model_training.split_data()
    model_training.train_model()
except Exception as e:
    raise e

[2024-09-13 22:56:30,028:INFO:common_func:yaml file: ..\params.yaml loaded successfully]
[2024-09-13 22:56:30,034:INFO:common_func:yaml file: ..\config\config.yaml loaded successfully]
[2024-09-13 22:56:30,034:INFO:common_func:Created directory at : ../artifacts/model]
[2024-09-13 22:56:30,034:INFO:2770646490:Loading Dataset from ../artifacts/datasets/processed_redwineq_data.csv into Datafram]
[2024-09-13 22:56:30,049:INFO:2770646490:Splitting the dataset into train and test datasets]
[2024-09-13 22:56:30,049:INFO:2770646490:Saving train and test data into artifact folder]
[2024-09-13 22:56:30,068:INFO:2770646490:Loading Training Data into dataframe]
[2024-09-13 22:56:30,080:INFO:2770646490:splitting the data into X and y]
[2024-09-13 22:56:30,081:INFO:2770646490:defining the model]
[2024-09-13 22:56:30,195:INFO:2770646490:Model is successfully trained]
[2024-09-13 22:56:30,209:INFO:2770646490:Model is successfully saved at {self.config.model_file_path}]
