In [1]:
import os
os.chdir("../")

In [69]:
from dataclasses import dataclass
from pathlib import Path

In [70]:
@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data: Path
    test_data: Path
    best_model_path: Path  
    train_not_scaled: Path
    test_not_scaled: Path  
    dim_red_model: Path

In [64]:
from src.Customer_segementation.constant import *
from src.Customer_segementation.utils.common import read_yaml, create_directories

In [71]:
class configurationManager: 
    def __init__(self,config_file_path=CONFIG_FILE_PATH,
                 schema_file_path=SCHEMA_FILE_PATH,
                 params_file_path=PARAMS_FILE_PATH):
        self.config=read_yaml(config_file_path)
        self.schema=read_yaml(schema_file_path)
        self.params=read_yaml(params_file_path)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self)->ModelTrainerConfig:
        config=self.config.model_trainer
        create_directories([config.root_dir])
        
        model_Trainer_config=ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data=config.train_data_scaled_path,
            test_data=config.test_data_scaled_path,
            best_model_path=config.best_model_path,
            train_not_scaled=config.train_data_not_scaled,
            test_not_scaled=config.test_data_not_scaled,
            dim_red_model=config.dim_red_model_path
            )
        return model_Trainer_config

In [72]:
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA
import pandas as pd
from src.Customer_segementation.logger import logger
from sklearn.metrics import confusion_matrix, precision_score, accuracy_score, classification_report
from src.Customer_segementation.utils.common import save_object

In [73]:
class ModelTainer:
    def __init__(self, config=ModelTrainerConfig) -> None:
        self.config=config

        self.model={
            'kmean': KMeans(n_clusters=4, init='k-means++')
        }

    def model_evaluation(self, x_train, x_test, y_train, y_test, Model):
        report={}
        dim_red=PCA(n_components=2)
        x_train_scaled=dim_red.fit_transform(x_train)
        x_test_scaled=dim_red.transform(x_test)

        for mod in range(len(Model)): 
            model= list(Model.values())[mod]

            model= model.fit(x_train_scaled)

            y_pred=model.predict(x_test_scaled)

            logger.info(f"Y perdication: {y_pred}")

            score=confusion_matrix(y_test, y_pred)
            # precision_sc=precision_score(y_test, y_pred, average='weighted')

            acc_score=accuracy_score(y_test, y_pred)
            # logger.info(f"Accuracy score: {acc_score}")

            class_report=classification_report(y_test, y_pred)
            # logger.info(f"Classification report\n\n: {class_report}")
            
            # mae=mean_absolute_error(x_test, y_pred)

            logger.info(f"Accuracy Score\n\n: {score}")
            # logger.info(f"precision_score: {precision_score}")

            report[list(Model.keys())[mod]]=acc_score
            # report[list(Model.keys())[mod]]=score
        return (report, model, dim_red)





    def inititate_model_trainer(self):
        train_data=self.config.train_data
        train_not_scaled=self.config.train_not_scaled
        test_not_scaled=self.config.test_not_scaled
        test_data=self.config.test_data

        x= pd.read_csv(train_data)
        x1=pd.read_csv(train_not_scaled)
        x1['Segmentation']=x1['Segmentation'].map({'A':1, 'B':2, 'C':3, 'D':4})

        y1=pd.read_csv(test_not_scaled)
        y1['Segmentation']=y1['Segmentation'].map({'A':1, 'B':2, 'C':3, 'D':4})

        y=pd.read_csv(test_data)

        x_train=x.iloc[:,:-1]
        y_train=x1.iloc[:,-1]

        x_test=y.iloc[:,:-1]
        y_test=y1.iloc[:,-1]

        # logger.info(f"x_: {x_test}")
        # logger.info(f"{y_test}")

        logger.info(f"Model list :{self.model}")

        report, model, dim_red=self.model_evaluation(x_train, x_test, y_train, y_test, self.model)
        logger.info(f"Report of model: {report}")

        save_object(file_path=Path(self.config.best_model_path), obj=model)
        save_object(file_path=Path(self.config.dim_red_model), obj=dim_red)
        


In [74]:
try: 
    config_manager=configurationManager()
    model_trainer_config=config_manager.get_model_trainer_config()
    model_trainer=ModelTainer(model_trainer_config)
    model_trainer.inititate_model_trainer()
except Exception as e:
    raise e

[2024-06-28 06:45:44,683 : INFO : common : Yaml file read config/config.yaml successfully]


[2024-06-28 06:45:44,697 : INFO : common : Yaml file read schema.yaml successfully]
[2024-06-28 06:45:44,709 : INFO : common : Yaml file read params.yaml successfully]
[2024-06-28 06:45:44,716 : INFO : common : Directories created ['artifacts']]
[2024-06-28 06:45:44,717 : INFO : common : Directories created ['artifacts/model_trainer']]
[2024-06-28 06:45:44,744 : INFO : 1976508948 : Model list :{'kmean': KMeans(n_clusters=4)}]


  super()._check_params_vs_input(X, default_n_init=10)


[2024-06-28 06:45:45,322 : INFO : 1976508948 : Y perdication: [3 1 3 ... 2 1 2]]
[2024-06-28 06:45:45,332 : INFO : 1976508948 : Accuracy Score

: [[  0   0   0   0   0]
 [134 108 364 120   0]
 [ 87  86 367  87   0]
 [ 59  48 397  76   0]
 [176 316 186  63   0]]]
[2024-06-28 06:45:45,332 : INFO : 1976508948 : Report of model: {'kmean': 0.20605833956619296}]
[2024-06-28 06:45:45,334 : INFO : common : Object save at: <_io.BufferedWriter name='artifacts/model_trainer/best_Model.h5'>]
[2024-06-28 06:45:45,335 : INFO : common : Object save at: <_io.BufferedWriter name='artifacts/model_trainer/dim_red.h5'>]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
