In [2]:
import os
os.chdir('../')
os.getcwd()

'd:\\Code Workspace\\Personal Project\\Personal Project - End to End Loan Status Classification'

In [3]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen= True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    C: float # Regularization parameter
    degree: int # Degree of the polynomial kernel function
    kernel: str # Kernel function {‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’} 
    target_column: str

In [4]:
from MLProject.constants import *
from MLProject.utils.common import read_yaml, create_directories

In [5]:
class ConfigurationManager:
    def __init__(
            self,
            config_filepath = CONFIG_FILE_PATH,
            params_filepath = PARAMS_FILE_PATH,
            schema_filepath = SCHEMA_FILE_PATH
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.SVC
        schema = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir= config.root_dir,
            train_data_path= config.train_data_path,
            test_data_path= config.test_data_path,
            model_name= config.model_name,
            C= params.C,
            degree= params.degree,
            kernel = params.kernel,
            target_column= schema.name
        )
        return model_trainer_config

In [8]:
import pandas as pd
import os
from MLProject import logger
from sklearn.svm import SVC
from MLProject.utils.common import save_bin

class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config

    def train(self):
        logger.info(f"Loading training and test data.")
        train_df = pd.read_csv(self.config.train_data_path)
        test_df = pd.read_csv(self.config.test_data_path)

        logger.info(f"Splitting input and target column from training and test data")
        X_train = train_df.drop([self.config.target_column], axis= 1)
        X_test = test_df.drop([self.config.target_column], axis= 1)
        y_train = train_df[[self.config.target_column]].values.ravel()
        y_test = test_df[[self.config.target_column]]

        logger.info(f"Training Started")
        model = SVC(C = self.config.C,
                    kernel= self.config.kernel,
                    degree= self.config.degree,
                    random_state= 42)
        model.fit(X_train, y_train)

        save_bin(model, path = os.path.join(self.config.root_dir, self.config.model_name))

In [7]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer = ModelTrainer(config= model_trainer_config)
    model_trainer.train()
except Exception as e:
    raise e

| 2024-01-23 18:46:57,699 | INFO | common | yaml file: config\config.yaml loaded successfully |
| 2024-01-23 18:46:57,703 | INFO | common | yaml file: params.yaml loaded successfully |
| 2024-01-23 18:46:57,706 | INFO | common | yaml file: schema.yaml loaded successfully |
| 2024-01-23 18:46:57,709 | INFO | common | created directory at: artifacts |
| 2024-01-23 18:46:57,710 | INFO | common | created directory at: artifacts\model_trainer |
| 2024-01-23 18:46:57,711 | INFO | 3052496624 | Loading training and test data. |
| 2024-01-23 18:46:57,981 | INFO | 3052496624 | Splitting input and target column from training and test data |
| 2024-01-23 18:46:57,991 | INFO | 3052496624 | Training Started |
[LibSVM]| 2024-01-23 18:49:38,153 | INFO | common | binary file saved at: artifacts\model_trainer\model.joblib |
