In [83]:
import os
print(os.getcwd())
os.chdir('d:\\vscode_machineLearning\\internship\\Customer-Churn-Prediction')
print(os.getcwd())

d:\vscode_machineLearning\internship\Customer-Churn-Prediction
d:\vscode_machineLearning\internship\Customer-Churn-Prediction


In [84]:
import pandas as pd

In [85]:
data = pd.read_csv(r'artifacts/raw_data/customer_churn_removed_col.csv')

In [86]:
data 

Unnamed: 0,Age,Gender,Location,Subscription_Length_Months,Monthly_Bill,Total_Usage_GB,Churn
0,63,Male,Los Angeles,17,73.36,236,0
1,62,Female,New York,1,48.76,172,0
2,24,Female,Los Angeles,5,85.47,460,0
3,36,Female,Miami,3,97.94,297,1
4,46,Female,Miami,19,58.14,266,0
...,...,...,...,...,...,...,...
99995,33,Male,Houston,23,55.13,226,1
99996,62,Female,New York,19,61.65,351,0
99997,64,Male,Chicago,17,96.11,251,1
99998,51,Female,New York,20,49.25,434,1


## Entity

In [87]:
from dataclasses import dataclass
from pathlib import Path

In [88]:
@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir : Path
    train_data: Path
    test_data: Path
    transform_train_df_path : Path
    transform_test_df_path : Path
    preprocessor_obj : str
    model : Path

## Configuration

In [89]:
from churnPredictor.utils import *
from churnPredictor import CustomException , logger
from churnPredictor.constants import *

In [90]:
class ConfigurationManager:
    def __init__(
        self,
        config_file_path = CONFIG_FILE_PATH,
        schema_file_path = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_file_path)
        self.schema = read_yaml(schema_file_path)

        create_dirs([self.config.artifacts_root])
        
    
    def get_data_transformation_config(self):
        config = self.config.data_transformation
        # schema = self.schema.columns_renamer

        create_dirs([config.root_dir,config.model_dir])
        

        return DataTransformationConfig(
            root_dir=config.root_dir,
            train_data=config.train_data_path,
            test_data=config.test_data_path,
            transform_test_df_path=config.transformed_test_df_path,
            transform_train_df_path=config.transformed_train_df_path,
            preprocessor_obj=config.preprocessor_obj,
            model=config.model_dir)


## components

In [91]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import (OneHotEncoder,
                                   MinMaxScaler)
import numpy as np
import joblib

In [92]:
class TransformData:
    def __init__(self,config:DataTransformationConfig):
        self.config = config

    def initiate_data_transformation(self):
        train_df = pd.read_csv(self.config.train_data)
         
        test_df = pd.read_csv(self.config.test_data)

        train_df['Gender']=train_df['Gender'].replace({'Male':0,'Female':1})
        test_df['Gender']=test_df['Gender'].replace({'Male':0,'Female':1})

        preprocessing = ColumnTransformer(transformers=[
                        ('OHE',OneHotEncoder(drop='first',sparse_output=False,dtype=np.int64),['Location']),
                        ('scaling',MinMaxScaler(),['Age', 'Subscription_Length_Months', 'Monthly_Bill', 'Total_Usage_GB'])
                    ],remainder='passthrough')
        
        transformed_train = preprocessing.fit_transform(train_df)
        transformed_test = preprocessing.fit_transform(test_df)

        transformed_train_df = pd.DataFrame(data=transformed_train,columns=preprocessing.get_feature_names_out())
        transformed_test_df = pd.DataFrame(data=transformed_test,columns=preprocessing.get_feature_names_out())

        transformed_train_df = transformed_train_df.rename(columns={
                                        'OHE__Location_Houston': 'Houston',
                                        'OHE__Location_Los Angeles': 'LosAngeles',
                                        'OHE__Location_Miami': 'Miami',
                                        'OHE__Location_New York': 'NewYork',
                                        'scaling__Age': 'Age',
                                        'scaling__Subscription_Length_Months': 'Subscription_Length_Months',
                                        'scaling__Monthly_Bill': 'Monthly_Bill',
                                        'scaling__Total_Usage_GB':'Total_Usage_GB',
                                        'remainder__Gender':'Gender'})
        
        transformed_test_df = transformed_test_df.rename(columns={
                                        'OHE__Location_Houston': 'Houston',
                                        'OHE__Location_Los Angeles': 'LosAngeles',
                                        'OHE__Location_Miami': 'Miami',
                                        'OHE__Location_New York': 'NewYork',
                                        'scaling__Age': 'Age',
                                        'scaling__Subscription_Length_Months': 'Subscription_Length_Months',
                                        'scaling__Monthly_Bill': 'Monthly_Bill',
                                        'scaling__Total_Usage_GB':'Total_Usage_GB',
                                        'remainder__Gender':'Gender'})
        
        
        transformed_train_df.to_csv(self.config.transform_train_df_path,index=False)
        transformed_test_df.to_csv(self.config.transform_test_df_path,index=False)
        joblib.dump(preprocessing,self.config.preprocessor_obj)
        logger.info("data transformation done!")
        logger.info(f'Columns : {transformed_train_df.columns}')
        logger.info(f'Columns : {transformed_test_df.columns}')
        logger.info(transformed_train_df.shape)
        logger.info(transformed_train_df.shape)


## pipeline

In [93]:
try:
    entity = ConfigurationManager()
    get_entity = entity.get_data_transformation_config()
    trans_data = TransformData(config=get_entity)
    trans_data.initiate_data_transformation()
except Exception as e:
    raise CustomException(e)


[2023-09-15 17:22:54,945: INFO: utils: yaml file: config\config.yaml loaded successfully]
[2023-09-15 17:22:54,947: INFO: utils: yaml file: schema.yaml loaded successfully]
[2023-09-15 17:22:54,948: INFO: utils: Created artifacts]
[2023-09-15 17:22:54,949: INFO: utils: Created artifacts\transformed_data]
[2023-09-15 17:22:54,949: INFO: utils: Created artifacts\model]
[2023-09-15 17:22:56,061: INFO: 371145532: data transformation done!]
[2023-09-15 17:22:56,062: INFO: 371145532: Columns : Index(['Houston', 'LosAngeles', 'Miami', 'NewYork', 'Age',
       'Subscription_Length_Months', 'Monthly_Bill', 'Total_Usage_GB',
       'Gender', 'remainder__Churn'],
      dtype='object')]
[2023-09-15 17:22:56,064: INFO: 371145532: Columns : Index(['Houston', 'LosAngeles', 'Miami', 'NewYork', 'Age',
       'Subscription_Length_Months', 'Monthly_Bill', 'Total_Usage_GB',
       'Gender', 'remainder__Churn'],
      dtype='object')]
[2023-09-15 17:22:56,064: INFO: 371145532: (80000, 10)]
[2023-09-15 17:2