In [2]:
%pwd

'c:\\Users\\44787\\Desktop\\projects\\end-to-end-SMS-Spam-classifier\\research'

In [3]:
import os

os.chdir('../')
%pwd

'c:\\Users\\44787\\Desktop\\projects\\end-to-end-SMS-Spam-classifier'

In [4]:
from dataclasses import dataclass
from pathlib import Path


@dataclass
class ModelTrainingConfig:
    root_dir: Path
    train_path: Path

In [5]:
from src.SMSClassifier.constants import CONFIG_PATH
from src.SMSClassifier.utils.common import read_yaml, create_directories

In [6]:
class ConfiguratoinManager:
    def __init__(self, config=CONFIG_PATH):
        self.config = read_yaml(config)

    def get_model_training_config(self) -> ModelTrainingConfig:
        config = self.config.model_training
        create_directories([config.root_dir])

        model_training_config = ModelTrainingConfig(
            root_dir=config.root_dir,
            train_path=config.train_path,
        )
        return model_training_config

In [7]:
from src.SMSClassifier.logging import logger
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import RandomizedSearchCV
import joblib
import pandas as pd

In [12]:
class ModelTraining:
    def __init__(self, config: ModelTrainingConfig) -> None:
        self.config = config

    def training_pipeline(self):

        train_path = pd.read_csv(self.config.train_path)

        X_train = train_path.drop('target', axis=1)
        y_train = train_path['target']
        
        grid = {
            "alpha": [0.00001, 0.0001, 0.001, 0.1, 1, 10, 100,1000],
            "force_alpha": [True, False]
            }
            
        model_rs = RandomizedSearchCV(estimator=MultinomialNB(),
                                          param_distributions=grid,
                                          n_iter=5,
                                          verbose=False)
        model_rs.fit(X_train, y_train)
        
        best_params = model_rs.best_params_
        
        logger.info(best_params)
        
        # train the model
        model = MultinomialNB(**best_params)
        model.fit(X_train, y_train)

        logger.info(f"Model train score: {model.score(X_train, y_train)}")

        # save the model
        joblib.dump(model, os.path.join(self.config.root_dir, 'model.joblib'))

In [13]:
try:
    config = ConfiguratoinManager()
    model_training_config = config.get_model_training_config()
    model = ModelTraining(model_training_config)
    model.training_pipeline()
except Exception as e:
    raise e

[2024-02-09 17:12:18,527: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-02-09 17:12:18,531: INFO: common: created directory at: artifacts/model_training]
[2024-02-09 17:12:22,467: INFO: 1817662677: {'force_alpha': True, 'alpha': 0.0001}]
[2024-02-09 17:12:22,546: INFO: 1817662677: Model train score: 0.9901960784313726]
