In [1]:
import os

In [2]:
os.chdir("E:/dibimbing/Portfolio/Bank-Loan-Default-Prediction")

In [3]:
%pwd

'E:\\dibimbing\\Portfolio\\Bank-Loan-Default-Prediction'

In [4]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    x_train_data_path: Path
    x_test_data_path: Path
    y_train_data_path: Path
    y_test_data_path: Path
    model_name: str
    learning_rate: float
    n_estimators: int
    max_depth: int
    min_child_weight: int
    gamma: int
    subsample: float
    colsample_bytree: float
    objective: str
    nthread: int
    scale_pos_weight: int
    seed: int
    target_column: str

In [5]:
from src.loanDefault.constants import *
from src.loanDefault.utils.common import read_yaml, create_directories

In [6]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.XGBoost
        schema =  self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            x_train_data_path = config.x_train_data_path,
            x_test_data_path = config.x_test_data_path,
            y_train_data_path = config.y_train_data_path,
            y_test_data_path = config.y_test_data_path,
            model_name = config.model_name,
            learning_rate = params.learning_rate,
            n_estimators = params.n_estimators,
            max_depth = params.max_depth,
            min_child_weight = params.min_child_weight,
            gamma = params.gamma,
            subsample = params.subsample,
            colsample_bytree = params.colsample_bytree,
            objective = params.objective,
            nthread = params.nthread,
            scale_pos_weight = params.scale_pos_weight,
            seed = params.seed,
            target_column = schema.Default
            
        )

        return model_trainer_config

In [7]:
import pandas as pd
import os
from src.loanDefault import logger
from xgboost import XGBClassifier
import joblib

In [8]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config

    
    def train(self):
        train_x = pd.read_csv(self.config.x_train_data_path)
        train_y = pd.read_csv(self.config.y_train_data_path)
        test_x = pd.read_csv(self.config.x_test_data_path)
        test_y = pd.read_csv(self.config.y_test_data_path)

        lr = XGBClassifier(
                learning_rate=self.config.learning_rate,
                n_estimators=self.config.n_estimators,
                max_depth=self.config.max_depth,
                min_child_weight=self.config.min_child_weight,
                gamma=self.config.gamma,
                subsample=self.config.subsample,
                colsample_bytree=self.config.colsample_bytree,
                objective=self.config.objective,
                nthread=self.config.nthread,
                scale_pos_weight=self.config.scale_pos_weight,
                seed=self.config.seed
            )
        
        lr.fit(train_x, train_y)

        joblib.dump(lr, os.path.join(self.config.root_dir, self.config.model_name))



In [9]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config)
    model_trainer_config.train()
except Exception as e:
    raise e

[2024-10-07 20:53:45,956: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-10-07 20:53:45,956: INFO: common: yaml file: params.yaml loaded successfully]
[2024-10-07 20:53:45,972: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-10-07 20:53:45,974: INFO: common: created directory at: artifacts]
[2024-10-07 20:53:45,975: INFO: common: created directory at: artifacts/model_trainer]
