In [1]:
!pwd

/home/techie/Desktop/general/end-to-end-liver-project/research


In [2]:
import os

os.chdir('../')
!pwd

/home/techie/Desktop/general/end-to-end-liver-project


In [44]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class ModelTrainConfig:
    root_dir: Path
    train_path: Path
    test_path: Path
    model_name: str
    target: str
    n_estimators: float
    min_samples_split: float
    min_samples_leaf: float
    max_depth: float

In [45]:
from liver.constants import *
from liver.utils.common import read_yaml, create_directories

In [47]:
class ConfigurationManager:
    def __init__(self,
                 config=CONFIG_FILE_PATH,
                 schema=SCHEMA_FILE_PATH,
                 params=PARAMS_FILE_PATH):
        self.config_path = read_yaml(config)
        self.schema = read_yaml(schema)
        self.params_path = read_yaml(params)
        
    def get_model_train_config(self) -> ModelTrainConfig:
        config = self.config_path.model_training
        schema = self.schema
        params = self.params_path
        
        create_directories([config.root_dir])
        
        model_train_config = ModelTrainConfig(
            root_dir=config.root_dir,
            train_path=config.train_path,
            test_path=config.test_path,
            model_name=config.model_name,
            target=schema.TARGET.name,
            n_estimators=params.Random_forest.n_estimators,
            min_samples_split=params.Random_forest.min_samples_split,
            min_samples_leaf=params.Random_forest.min_samples_leaf,
            max_depth=params.Random_forest.max_depth
        )
        
        return model_train_config

In [54]:
from liver import logger
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
import joblib
import os

In [59]:
class ModelTraining:
    def __init__(self,
                 config: ModelTrainConfig) -> None:
        self.config = config
        
    def train(self):
        train_data = pd.read_csv(self.config.train_path)
        test_data = pd.read_csv(self.config.test_path)
        
        X_train = train_data.drop([self.config.target], axis=1)
        y_train = train_data[[self.config.target]]
        
        model = RandomForestClassifier(
            n_estimators=self.config.n_estimators,
            min_samples_split=self.config.min_samples_split,
            min_samples_leaf=self.config.min_samples_leaf,
            max_depth=self.config.max_depth
        )
        
        model.fit(X_train, y_train)
        
        logger.info(f'Train model score: {model.score(X_train, y_train)}')
        
        joblib.dump(model, os.path.join(self.config.root_dir, self.config.model_name))


In [60]:
try:
    config = ConfigurationManager()
    model_train_config = config.get_model_train_config()
    model = ModelTraining(model_train_config)
    model.train()
except Exception as e:
    raise e

[2023-10-20 20:51:42,918: INFO: common: yaml file: config/config.yaml loaded successfully]
[2023-10-20 20:51:42,929: INFO: common: yaml file: schema.yaml loaded successfully]
[2023-10-20 20:51:42,936: INFO: common: yaml file: params.yaml loaded successfully]
[2023-10-20 20:51:42,939: INFO: common: created directory at: artifacts/model_training]


  return fit_method(estimator, *args, **kwargs)


[2023-10-20 20:51:44,498: INFO: 1105581700: Train model score: 0.9710144927536232]
