In [20]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [21]:
df = pd.read_csv('/content/Heart Attack.csv')

In [22]:
df.head()

Unnamed: 0,age,gender,impluse,pressurehight,pressurelow,glucose,kcm,troponin,class
0,64,1,66,160,83,160.0,1.8,0.012,negative
1,21,1,94,98,46,296.0,6.75,1.06,positive
2,55,1,64,160,77,270.0,1.99,0.003,negative
3,64,1,70,120,55,270.0,13.87,0.122,positive
4,55,1,64,112,65,300.0,1.08,0.003,negative


In [23]:
df.isnull().sum()

age              0
gender           0
impluse          0
pressurehight    0
pressurelow      0
glucose          0
kcm              0
troponin         0
class            0
dtype: int64

In [24]:
le = LabelEncoder()
df['class'] = le.fit_transform(df['class'])

In [25]:
df.head()

Unnamed: 0,age,gender,impluse,pressurehight,pressurelow,glucose,kcm,troponin,class
0,64,1,66,160,83,160.0,1.8,0.012,0
1,21,1,94,98,46,296.0,6.75,1.06,1
2,55,1,64,160,77,270.0,1.99,0.003,0
3,64,1,70,120,55,270.0,13.87,0.122,1
4,55,1,64,112,65,300.0,1.08,0.003,0


**Data Splitting**

In [26]:
X = df.iloc[:,:-1]
Y = df.loc[:,'class']

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size = 0.2, random_state =42)

**Before Applying PCA, Decision Tree**

In [44]:
dtree = DecisionTreeClassifier()
dtree.fit(X_train, y_train)

y_train_pred_dtree = dtree.predict(X_train)
y_test_pred_dtree = dtree.predict(X_test)

train_accuracy = accuracy_score(y_train, y_train_pred_dtree)
test_accuracy = accuracy_score(y_test, y_test_pred_dtree)

print("Decision Tree Training Accuracy:", train_accuracy)
print("Decision Tree Testing Accuracy:", test_accuracy)

print("\nDecision Tree Classification Report (Testing):")
print(classification_report(y_test, y_test_pred_dtree))

print("Confusion Matrix (Decision Tree):")
print(confusion_matrix(y_test, y_test_pred_dtree))


Decision Tree Training Accuracy: 1.0
Decision Tree Testing Accuracy: 0.5606060606060606

Decision Tree Classification Report (Testing):
              precision    recall  f1-score   support

           0       0.43      0.45      0.44       101
           1       0.65      0.63      0.64       163

    accuracy                           0.56       264
   macro avg       0.54      0.54      0.54       264
weighted avg       0.56      0.56      0.56       264

Confusion Matrix (Decision Tree):
[[ 45  56]
 [ 60 103]]


**After Applying PCA, Decision Tree**

In [29]:
X = df.iloc[:,:-1]
Y = df.loc[:,'class']

In [30]:
ss = StandardScaler()
X_scaled = ss.fit_transform(X)
X_scaled

array([[ 5.72357956e-01,  7.18395402e-01, -2.39032215e-01, ...,
         1.78459449e-01, -2.90961900e-01, -3.02342376e-01],
       [-2.57963993e+00,  7.18395402e-01,  3.03491001e-01, ...,
         1.99434379e+00, -1.84072428e-01,  6.05700979e-01],
       [-8.73625310e-02,  7.18395402e-01, -2.77783874e-01, ...,
         1.64718943e+00, -2.86859072e-01, -3.10140458e-01],
       ...,
       [-8.20385295e-01,  7.18395402e-01,  1.29108539e-01, ...,
        -6.76074358e-01, -3.03054447e-01,  3.36968791e+00],
       [-1.60664807e-01,  7.18395402e-01, -3.94038849e-01, ...,
         3.95710113e+00, -2.04586569e-01, -1.68298262e-03],
       [-3.80571637e-01,  7.18395402e-01,  3.03491001e-01, ...,
        -1.68694910e-01,  7.69079350e-01,  1.22088302e+00]])

In [31]:
from sklearn.decomposition import PCA

In [32]:
pca = PCA(n_components=2)
princicpal_component = pca.fit_transform(X_scaled)
PrincipalDF = pd.DataFrame(data = princicpal_component, columns = ['PC1', 'PC2'])

In [33]:
PrincipalDF.head()

Unnamed: 0,PC1,PC2
0,1.367477,-0.025233
1,-2.066362,-1.912998
2,1.024736,-0.411556
3,-1.093835,0.263509
4,-0.843894,-0.302465


In [34]:
PrincipalDF = pd.concat([PrincipalDF,Y], axis = 1)

In [35]:
PrincipalDF.head()

Unnamed: 0,PC1,PC2,class
0,1.367477,-0.025233,0
1,-2.066362,-1.912998,1
2,1.024736,-0.411556,0
3,-1.093835,0.263509,1
4,-0.843894,-0.302465,0


In [39]:
X = PrincipalDF.drop('class',axis=1)
Y = PrincipalDF['class']

In [40]:
X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size = 0.2, random_state =42)

In [41]:
dtree = DecisionTreeClassifier()
dtree.fit(X_train, y_train)

y_train_pred_dtree = dtree.predict(X_train)
y_test_pred_dtree = dtree.predict(X_test)

print("Decision Tree Training Accuracy:", accuracy_score(y_train, y_train_pred_dtree))
print("Decision Tree Testing Accuracy:", accuracy_score(y_test, y_test_pred_dtree))

print("\nDecision Tree Classification Report (Testing):")
print(classification_report(y_test, y_test_pred_dtree))

print("Confusion Matrix (Decision Tree):")
print(confusion_matrix(y_test, y_test_pred_dtree))

Decision Tree Training Accuracy: 1.0
Decision Tree Testing Accuracy: 0.5416666666666666

Decision Tree Classification Report (Testing):
              precision    recall  f1-score   support

           0       0.41      0.43      0.42       101
           1       0.63      0.61      0.62       163

    accuracy                           0.54       264
   macro avg       0.52      0.52      0.52       264
weighted avg       0.55      0.54      0.54       264

Confusion Matrix (Decision Tree):
[[ 43  58]
 [ 63 100]]
