In [1]:
import os

In [2]:
pwd

'd:\\PW_DS\\Machine_Learning\\End-To-End-ML-Project-Implementation\\research'

In [3]:
os.chdir('../')

In [4]:
pwd

'd:\\PW_DS\\Machine_Learning\\End-To-End-ML-Project-Implementation'

## Entity

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    train_data: Path
    test_data: Path
    trans_obj: Path
    data_file: Path



## Config Manager

In [6]:
from heart_disease_pred.utils.commom import read_yaml, create_directories, save_csv
from heart_disease_pred.constants import  *
import pandas as pd

In [7]:
from heart_disease_pred.utils.commom import read_yaml, create_directories, save_csv
from heart_disease_pred.constants import  *
import pandas as pd

class ConfigManager:
    def __init__(
        self,
        config_file_path = CONFIG_FILE_PATH,
        schema_file_path = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_file_path)
        self.schema = read_yaml(schema_file_path)

        create_directories([self.config.artifacts_root])
    

    def get_data_transformation_config(self) -> DataTransformationConfig:

        config = self.config.data_transformation

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            train_data=config.train_data,
            test_data=config.test_data,
            trans_obj=config.trans_obj,
            data_file=config.data_file
        )

        return data_transformation_config




## Components

In [8]:
import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.pipeline import Pipeline

from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.impute import SimpleImputer

from sklearn.compose import ColumnTransformer

from heart_disease_pred.utils.commom import  save_pickle

import os, sys
from pathlib import Path
from heart_disease_pred.logger import logging
from heart_disease_pred.exception import CustomException

import pickle

In [9]:
class DataTransformation:
    def __init__(
        self,
        config: DataTransformationConfig):

        self.config = config

    
    def split_data(self):
        try:

            data = pd.read_csv(self.config.data_file)

            data = data.drop(['Unnamed: 0'], axis = 1)

            train_data, test_data = train_test_split(data, test_size=0.22, random_state=42)

            train_data.to_csv(self.config.train_data, index=False)
            test_data.to_csv(self.config.test_data, index=False)
        
        except Exception as e:
            raise CustomException(e,sys)
    
    def transform_data(self):
        try:

            train_data = pd.read_csv(self.config.train_data)
            test_data = pd.read_csv(self.config.test_data)

            # train_data = train_data.drop(['Unnamed: 0'], axis = 1)
            # test_data = test_data.drop(['Unnamed: 0'], axis = 1)

            cat_features = [col for col in train_data.columns if train_data[col].dtype == 'object']
            num_features = [col for col in train_data.columns if train_data[col].dtype != 'object']

            cat_features.remove('HeartDisease')

            cat_categories = [

            ['No',  'Yes'],
            ['No' , 'Yes'],
            ['No' , 'Yes'],
            ['No', 'Yes'],
            ['Female' , 'Male'],
            ['18-24',  '25-29',  '30-34',  '35-39',  '40-44' , '45-49' , '50-54',  '55-59', '60-64',  '65-69' , '70-74',  '75-79' , '80 or older'],
            ['White' , 'Black' , 'Asian' , 'American Indian/Alaskan Native',  'Hispanic','Other'],
            ['No' , 'No, borderline diabetes' , 'Yes (during pregnancy)' , 'Yes'],
            ['No',  'Yes'],
            ['Poor',  'Fair' , 'Good',  'Very good',  'Excellent'],
            ['No',  'Yes'],
            ['No' , 'Yes'],
            ['No' , 'Yes'],

            ]

            num_pipeline = Pipeline(
                steps = [
                    ('imputer', SimpleImputer(strategy='median')),
                    ('scaler', StandardScaler())
                ]
            )

            cat_pipeline = Pipeline(
                steps = [
                    ('imputer', SimpleImputer(strategy='most_frequent')),
                    ('ordinal_encoder', OrdinalEncoder(categories=cat_categories))
                ]
            )


            transform_data = ColumnTransformer([
                ('num_pipeline', num_pipeline, num_features),
                ('cat_pipeline', cat_pipeline, cat_features),
            
            ])

            save_pickle(self.config.trans_obj, transform_data)

    
        except Exception as e:
            raise CustomException(e,sys)


## Piepline

In [10]:
config = ConfigManager()
data_transformation_config = config.get_data_transformation_config()
data_transformation = DataTransformation(data_transformation_config)

data_transformation.split_data()

data_transformation.transform_data()
