In [1]:
import os
os.chdir("../")
import pandas as pd


In [2]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    data_path: Path

In [3]:
from HeartDisease.constants import *
from HeartDisease.utils.common import read_yaml, create_directories

In [4]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            data_path=config.data_path,
        )

        return data_transformation_config

In [5]:
import os
from HeartDisease import logger
from sklearn.model_selection import train_test_split
import pandas as pd

In [6]:
class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config


    def train_test_spliting(self):
        data = pd.read_csv(self.config.data_path)

        # Split the data into training and test sets. (0.75, 0.25) split.
        train, test = train_test_split(data)

        train.to_csv(os.path.join(self.config.root_dir, "train.csv"),index = False)
        test.to_csv(os.path.join(self.config.root_dir, "test.csv"),index = False)

        logger.info("Splited data into training and test sets")
        logger.info(train.shape)
        logger.info(test.shape)

        print(train.shape)
        print(test.shape)
        

In [7]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    data_transformation.train_test_spliting()
except Exception as e:
    raise e

[2023-11-21 09:43:04,777: INFO: common: yaml file: config\config.yaml loaded successfully]
[2023-11-21 09:43:04,777: INFO: common: yaml file: params.yaml loaded successfully]
[2023-11-21 09:43:04,780: INFO: common: yaml file: schema.yaml loaded successfully]
[2023-11-21 09:43:04,780: INFO: common: created directory at: artifacts]
[2023-11-21 09:43:04,786: INFO: common: created directory at: artifacts/data_transformation]
[2023-11-21 09:43:04,813: INFO: 741865018: Splited data into training and test sets]
[2023-11-21 09:43:04,813: INFO: 741865018: (1199, 12)]
[2023-11-21 09:43:04,816: INFO: 741865018: (400, 12)]
(1199, 12)
(400, 12)


In [4]:
test = pd.read_csv('artifacts/data_transformation/test.csv')

In [6]:
test.head()

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,0,23.44,0.060469,0.088185,0.074638,10.0,20.0,0.225722,0.066507,0.227609,0.091848,0.065306,0.137116,0.102523,6.0,0.081118,0.077655,0.077082
1,0,32.49,0.060469,0.088185,0.074638,0.0,4.0,0.063047,0.106702,0.023013,0.091848,0.065306,0.070789,0.022644,8.0,0.081118,0.077655,0.077082
2,0,21.93,0.060469,0.088185,0.074638,0.0,0.0,0.063047,0.106702,0.098706,0.091848,0.065306,0.070789,0.022644,7.0,0.081118,0.077655,0.077082
3,0,26.58,0.060469,0.088185,0.074638,0.0,2.0,0.063047,0.106702,0.033436,0.091848,0.065306,0.070789,0.04744,7.0,0.081118,0.077655,0.077082
4,0,19.02,0.121428,0.088185,0.074638,2.0,2.0,0.063047,0.066507,0.227609,0.091848,0.065306,0.070789,0.102523,6.0,0.081118,0.077655,0.077082


In [7]:
catcols = list(test.select_dtypes(exclude='number').columns)

In [25]:
cols = ['BMI','Smoking','AlcoholDrinking','Stroke','PhysicalHealth','MentalHealth','DiffWalking','Sex','AgeCategory','Race','Diabetic','PhysicalActivity','GenHealth','SleepTime','Asthma','KidneyDisease','SkinCancer']
catcols = ['Smoking','AlcoholDrinking','Stroke','DiffWalking','Sex','AgeCategory','Race','Diabetic','PhysicalActivity','GenHealth','Asthma','KidneyDisease','SkinCancer']

In [34]:
import numpy as np
data = [23.44,'Yes','No','No',4,4,'No','Female','40-44','White','No','Yes','Good','7','Yes','No','No']
data = np.array(data).reshape(1, 17)

In [35]:
data = pd.DataFrame(data,columns=cols)

In [40]:
import pickle

# file = open('artifacts/data_transformation/encoder.obj','rb')
#encoder = pickle.load(Path('artifacts/data_transformation/encoder.obj'))
data[catcols] = encoder.transform(data[catcols])
# file.close()





In [36]:
import joblib


model = joblib.load(Path('artifacts/model_trainer/model.joblib'))

In [63]:
from torch import Tensor
import torch

#data = torch.tensor(data.values.astype(np.float32))
prediction = model(data)
prediction = prediction.detach().numpy()
# prediction[0][0]

In [68]:
'Yes' if prediction[0][0]>0.5 else 'No'

'No'