In [2]:
import os
os.chdir("../")

In [3]:
from dataclasses import dataclass
from pathlib import Path

In [4]:
@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data: Path
    test_data: Path
    best_model_path: Path  
    train_not_scaled: Path
    test_not_scaled: Path  

In [5]:
from src.Customer_segementation.constant import *
from src.Customer_segementation.utils.common import read_yaml, create_directories

In [6]:
class configurationManager: 
    def __init__(self,config_file_path=CONFIG_FILE_PATH,
                 schema_file_path=SCHEMA_FILE_PATH,
                 params_file_path=PARAMS_FILE_PATH):
        self.config=read_yaml(config_file_path)
        self.schema=read_yaml(schema_file_path)
        self.params=read_yaml(params_file_path)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self)->ModelTrainerConfig:
        config=self.config.model_trainer
        
        model_Trainer_config=ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data=config.train_data_scaled_path,
            test_data=config.test_data_scaled_path,
            best_model_path=config.best_model_path,
            train_not_scaled=config.train_data_not_scaled,
            test_not_scaled=config.test_data_not_scaled
            )
        return model_Trainer_config

In [34]:
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA
import pandas as pd
from src.Customer_segementation.logger import logger
from sklearn.metrics import confusion_matrix, precision_score

In [37]:
class ModelTainer:
    def __init__(self, config=ModelTrainerConfig) -> None:
        self.config=config

        self.model={
            'kmean': KMeans(n_clusters=4, init='k-means++')
        }

    def model_evaluation(self, x_train, x_test, y_train, y_test, Model):
        
        dim_red=PCA(n_components=2)
        x_train_scaled=dim_red.fit_transform(x_train)
        x_test_scaled=dim_red.transform(x_test)

        for mod in range(len(Model)): 
            model= list(Model.values())[mod]

            model.fit(x_train_scaled)

            y_pred=model.predict(x_test_scaled)

            logger.info(f"Y perdication: {y_pred}")

            score=confusion_matrix(y_test, y_pred)
            precision_sc=precision_score(y_test, y_pred, average='weighted')

            # acc_score=accuracy_score(x_test, y_pred)
            # mae=mean_absolute_error(x_test, y_pred)

            logger.info(f"Accuracy Score: {score}")
            logger.info(f"precision_score: {precision_score}")



    def inititate_model_trainer(self):
        train_data=self.config.train_data
        train_not_scaled=self.config.train_not_scaled
        test_not_scaled=self.config.test_not_scaled
        test_data=self.config.test_data

        x= pd.read_csv(train_data)
        x1=pd.read_csv(train_not_scaled)
        x1['Segmentation']=x1['Segmentation'].map({'A':1, 'B':2, 'C':3, 'D':4})

        y1=pd.read_csv(test_not_scaled)
        y1['Segmentation']=y1['Segmentation'].map({'A':1, 'B':2, 'C':3, 'D':4})

        y=pd.read_csv(test_data)

        x_train=x.iloc[:,:-1]
        y_train=x1.iloc[:,-1]

        x_test=y.iloc[:,:-1]
        y_test=y1.iloc[:,-1]

        # logger.info(f"x_: {x_test}")
        # logger.info(f"{y_test}")

        logger.info(f"Model list :{self.model}")

        self.model_evaluation(x_train, x_test, y_train, y_test, self.model)
        
        






In [38]:
try: 
    config_manager=configurationManager()
    model_trainer_config=config_manager.get_model_trainer_config()
    model_trainer=ModelTainer(model_trainer_config)
    model_trainer.inititate_model_trainer()
except Exception as e:
    raise e

[2024-06-27 20:12:13,283 : INFO : common : Yaml file read config/config.yaml successfully]
[2024-06-27 20:12:13,286 : INFO : common : Yaml file read schema.yaml successfully]
[2024-06-27 20:12:13,287 : INFO : common : Yaml file read params.yaml successfully]
[2024-06-27 20:12:13,288 : INFO : common : Directories created ['artifacts']]
[2024-06-27 20:12:13,310 : INFO : 1544243658 : Model list :{'kmean': KMeans(n_clusters=4)}]


  super()._check_params_vs_input(X, default_n_init=10)


[2024-06-27 20:12:13,920 : INFO : 1544243658 : Y perdication: [0 3 0 ... 1 3 1]]
[2024-06-27 20:12:13,924 : INFO : 1544243658 : Accuracy Score: [[  0   0   0   0   0]
 [121 363 134 108   0]
 [ 87 367  87  86   0]
 [ 77 396  59  48   0]
 [ 63 186 176 316   0]]]
[2024-06-27 20:12:13,925 : INFO : 1544243658 : precision_score: <function precision_score at 0x11b90e310>]


  _warn_prf(average, modifier, msg_start, len(result))
