In [9]:
import os,sys
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('heart.csv')

In [3]:
df.head()

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,output
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [4]:
from dataclasses import dataclass
from pathlib import Path 

@dataclass
class Data_Transformation_Config:
    root_dir: Path
    data_path: Path

In [5]:
os.chdir('../')

In [28]:
from src.Project4.constants import *
from src.Project4.utils.common import read_yaml,create_directories
class Configuration_Manager:
    def __init__(self,config_file_path=CONFIG_FILE_PATH,schema_file_path=SCHEMA_FILE_PATH,params_file_path=PARAMS_FILE_PATH):
        self.config = read_yaml(config_file_path)
        self.schema = read_yaml(schema_file_path)
        self.params = read_yaml(params_file_path)
        
        create_directories([self.config.artifacts_root])
        
    def get_data_transformation_config(self) -> Data_Transformation_Config:
        config = self.config.data_transformation
        create_directories([config.root_dir])
        data_transformation_config = Data_Transformation_Config(root_dir=config.root_dir,data_path=config.data_path)

        return data_transformation_config

In [35]:
from sklearn.model_selection import train_test_split
from src.Project4.utils.common import logger
class Data_Transformation:
    def __init__(self,config: Data_Transformation_Config):
        self.config = config

    def transform_data(self):
        try:
            data = pd.read_csv(self.config.data_path)
            features = ['cp','thalachh','slp','restecg','exng','oldpeak','caa','thall','sex','age']
            X = data[features]
            y = data.iloc[:,-1]
            train_data,test_data = train_test_split(X,y,test_size=0.2,random_state=42)
            train_data.to_csv(os.path.join(self.config.root_dir,"train.csv"),index=False)
            test_data.to_csv(os.path.join(self.config.root_dir,"test.csv"),index=False)
            logger.info('Splitted the data into train and test')
            logger.info(f'Train data shape: {train_data.shape}')
            logger.info(f'Test data shape: {test_data.shape}')
        except Exception as e:
            raise e
        

In [36]:
try:
    config = Configuration_Manager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = Data_Transformation(config=data_transformation_config)
    data_transformation.transform_data()
    
except Exception as e:
    raise e

2024-02-25 14:00:01,421 - src_logger - INFO - Yaml file:config\config.yaml loaded successfully
2024-02-25 14:00:01,425 - src_logger - INFO - Yaml file:schema.yaml loaded successfully
2024-02-25 14:00:01,433 - src_logger - INFO - Yaml file:params.yaml loaded successfully
2024-02-25 14:00:01,436 - src_logger - INFO - Created directory at: artifacts
2024-02-25 14:00:01,439 - src_logger - INFO - Created directory at: artifacts/data_transformation
2024-02-25 14:00:01,451 - src_logger - INFO - Splitted the data into train and test
2024-02-25 14:00:01,453 - src_logger - INFO - Train data shape: (227, 14)
2024-02-25 14:00:01,454 - src_logger - INFO - Test data shape: (76, 14)
