In [3]:
import os

In [4]:
%pwd

'c:\\Users\\ajay\\Desktop\\myPortfolio\\CommentAnalysis\\research'

In [5]:
os.chdir('../')

In [6]:
%pwd

'c:\\Users\\ajay\\Desktop\\myPortfolio\\CommentAnalysis'

In [7]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainerConfig:
    trained_model_path:Path
    x_train_file_path: Path
    y_train_file_path: Path
    root_dir:Path
    transformer_obj:Path
    learning_rate:float
    max_depth:int
    n_estimators:int

In [8]:
from src.CommentAnalysis.constants import *
from src.CommentAnalysis.utils.common import read_yaml, create_directories

In [15]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema=read_yaml(SCHEMA_FILE_PATH)
        create_directories([self.config.artifacts_root])
    
    def get_model_config(self) -> ModelTrainerConfig:

        config = self.config.prepare_model
        config2=self.config.data_Transformation

        learning_rate = self.params['model_building']['learning_rate']
        max_depth = self.params['model_building']['max_depth']
        n_estimators = self.params['model_building']['n_estimators']


        create_directories([config.root_dir])
        

        model_trainer_config = ModelTrainerConfig(
            trained_model_path=config.trained_model_path,
            x_train_file_path=config2.x_train_file_path,
            y_train_file_path=config2.y_train_file_path,
            root_dir=config.root_dir,
            transformer_obj=config2.transformer,
            learning_rate=learning_rate,
            max_depth=max_depth,
            n_estimators=n_estimators
            
        )
        

        return model_trainer_config
      

In [16]:
import sys
import numpy as np
import pandas as pd
from src.CommentAnalysis import logger
import lightgbm as lgb
from scipy import sparse
from src.CommentAnalysis.utils.common import read_data, save_bin, read_yaml
from src.CommentAnalysis.constants import SCHEMA_FILE_PATH


class ModelTrainer:
    def __init__(self, ModelTrainerConfig):
        try:
            self.model_trainer_config = ModelTrainerConfig
            self._schema_config = read_yaml(SCHEMA_FILE_PATH)
            logger.info("ModelTrainer initialized successfully.")
        except Exception as e:
            logger.exception("Failed to initialize ModelTrainer.")
            raise Exception(e, sys)

    def train_lgbm(
        self,
        X_train: np.ndarray,
        y_train: np.ndarray,
        learning_rate: float,
        max_depth: int,
        n_estimators: int
    ) -> lgb.LGBMClassifier:
        """
        Train a LightGBM model with specified parameters.
        """
        try:
            logger.info("Starting LightGBM model training...")
            best_model = lgb.LGBMClassifier(
                objective='multiclass',
                num_class=3,
                metric="multi_logloss",
                is_unbalance=True,
                class_weight="balanced",
                reg_alpha=0.1,  # L1 regularization
                reg_lambda=0.1,  # L2 regularization
                learning_rate=learning_rate,
                max_depth=max_depth,
                n_estimators=n_estimators
            )
            best_model.fit(X_train, y_train)
            logger.info("LightGBM model training completed successfully.")
            return best_model
        except Exception as e:
            logger.exception("Error during LightGBM model training.")
            raise

    def initiate_model_training(self):
        """
        Orchestrate the model training pipeline.
        """
        try:
            logger.info("Loading training data...")
            X_train_tfidf = sparse.load_npz(self.model_trainer_config.x_train_file_path)
            y_train = read_data(self.model_trainer_config.y_train_file_path)

            # Ensure y_train is 1D np.ndarray
            if isinstance(y_train, pd.DataFrame):
                y_train = y_train.iloc[:, 0].values
            elif isinstance(y_train, pd.Series):
                y_train = y_train.values

            logger.info(f"Training data loaded. Shape: X={X_train_tfidf.shape}, y={y_train.shape}")

            model = self.train_lgbm(
                X_train=X_train_tfidf,
                y_train=y_train,
                learning_rate=self.model_trainer_config.learning_rate,
                max_depth=self.model_trainer_config.max_depth,
                n_estimators=self.model_trainer_config.n_estimators
            )

            save_bin(model, self.model_trainer_config.trained_model_path)
            logger.info(f"Trained model saved at: {self.model_trainer_config.trained_model_path}")
        except Exception as e:
            logger.exception("Failed during model training pipeline.")
            raise


In [None]:
try:
    config = ConfigurationManager()
    modelconfig = config.get_model_config()
    modeltrainer = ModelTrainer(ModelTrainerConfig=modelconfig)
    modeltrainer.initiate_model_training() 
   
except Exception as e:
    raise e

[2025-07-08 01:51:45,513: INFO: common: YAML file loaded successfully: config\config.yaml]
[2025-07-08 01:51:45,521: INFO: common: YAML file loaded successfully: params.yaml]
[2025-07-08 01:51:45,528: INFO: common: YAML file loaded successfully: config\schema.yaml]
[2025-07-08 01:51:45,542: INFO: common: Created directory: artifacts]
[2025-07-08 01:51:45,549: INFO: common: Created directory: artifacts/prepare_base_model]
[2025-07-08 01:51:45,556: INFO: common: YAML file loaded successfully: config\schema.yaml]
[2025-07-08 01:51:45,559: INFO: 134846345: ModelTrainer initialized successfully.]
[2025-07-08 01:51:45,562: INFO: 134846345: Loading training data...]


[2025-07-08 01:51:45,903: INFO: 134846345: Training data loaded. Shape: X=(29717, 10000), y=(29717,)]
[2025-07-08 01:51:45,904: INFO: 134846345: Starting LightGBM model training...]




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.828019 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 130560
[LightGBM] [Info] Number of data points in the train set: 29717, number of used features: 4396
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[2025-07-08 01:54:09,569: INFO: 134846345: LightGBM model training completed successfully.]
[2025-07-08 01:54:09,858: INFO: common: Binary file saved: artifacts/prepare_base_model/trained_model.pkl]
[2025-07-08 01:54:09,861: INFO: 134846345: Trained model saved at: artifacts/prepare_base_model/trained_model.pkl]
