In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix


In [7]:
heart_data=pd.read_csv('heart_attack_prediction_dataset.csv')

In [8]:
heart_data.drop(['Income','Country','Continent','Hemisphere'],axis=1,inplace=True)

In [9]:
heart_data.drop(['Patient ID'],axis=1,inplace=True)

In [10]:
heart_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8763 entries, 0 to 8762
Data columns (total 21 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Age                              8763 non-null   int64  
 1   Sex                              8763 non-null   object 
 2   Cholesterol                      8763 non-null   int64  
 3   Blood Pressure                   8763 non-null   object 
 4   Heart Rate                       8763 non-null   int64  
 5   Diabetes                         8763 non-null   int64  
 6   Family History                   8763 non-null   int64  
 7   Smoking                          8763 non-null   int64  
 8   Obesity                          8763 non-null   int64  
 9   Alcohol Consumption              8763 non-null   int64  
 10  Exercise Hours Per Week          8763 non-null   float64
 11  Diet                             8763 non-null   object 
 12  Previous Heart Probl

In [11]:
gen=pd.get_dummies(heart_data['Sex'],drop_first=True)


In [12]:
diet=pd.get_dummies(heart_data['Diet'],drop_first=True)

In [13]:
blood_pressure_split = heart_data['Blood Pressure'].str.split('/', expand=True)
heart_data['Systolic'] = blood_pressure_split[0].astype(int)
heart_data['Diastolic'] = blood_pressure_split[1].astype(int)

In [15]:
heart_data=pd.concat([heart_data,gen,diet],axis=1)

In [16]:
heart_data.drop(['Sex','Diet'],axis=1,inplace=True)

In [17]:
heart_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8763 entries, 0 to 8762
Data columns (total 24 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Age                              8763 non-null   int64  
 1   Cholesterol                      8763 non-null   int64  
 2   Blood Pressure                   8763 non-null   object 
 3   Heart Rate                       8763 non-null   int64  
 4   Diabetes                         8763 non-null   int64  
 5   Family History                   8763 non-null   int64  
 6   Smoking                          8763 non-null   int64  
 7   Obesity                          8763 non-null   int64  
 8   Alcohol Consumption              8763 non-null   int64  
 9   Exercise Hours Per Week          8763 non-null   float64
 10  Previous Heart Problems          8763 non-null   int64  
 11  Medication Use                   8763 non-null   int64  
 12  Stress Level        

In [18]:
heart_data.drop(['Blood Pressure'],axis=1,inplace=True)

In [19]:
heart_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8763 entries, 0 to 8762
Data columns (total 23 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Age                              8763 non-null   int64  
 1   Cholesterol                      8763 non-null   int64  
 2   Heart Rate                       8763 non-null   int64  
 3   Diabetes                         8763 non-null   int64  
 4   Family History                   8763 non-null   int64  
 5   Smoking                          8763 non-null   int64  
 6   Obesity                          8763 non-null   int64  
 7   Alcohol Consumption              8763 non-null   int64  
 8   Exercise Hours Per Week          8763 non-null   float64
 9   Previous Heart Problems          8763 non-null   int64  
 10  Medication Use                   8763 non-null   int64  
 11  Stress Level                     8763 non-null   int64  
 12  Sedentary Hours Per 

In [32]:
X = heart_data.drop(['Heart Attack Risk'], axis=1)
y = heart_data['Heart Attack Risk']

In [33]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [34]:
scaler = StandardScaler()

In [35]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [36]:
dt_model = DecisionTreeClassifier(random_state=2)
dt_model.fit(X_train, y_train)


DecisionTreeClassifier(random_state=2)

In [37]:

y_pred = dt_model.predict(X_test)

In [38]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[703 428]
 [393 229]]
              precision    recall  f1-score   support

           0       0.64      0.62      0.63      1131
           1       0.35      0.37      0.36       622

    accuracy                           0.53      1753
   macro avg       0.49      0.49      0.49      1753
weighted avg       0.54      0.53      0.53      1753



In [None]:


# Train-test split


# Standardize the features (optional for Decision Trees, but good practice)


# Initialize and train the Decision Tree model
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

# Make predictions
y_pred = dt_model.predict(X_test)

# Evaluate the model
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
