In [4]:
# Initial imports
import pandas as pd
from path import Path
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [5]:
# Loading data
file_path = Path("Project_Data_Folder/cleveland_heart_data.csv")
df_heart_disease = pd.read_csv(file_path)
df_heart_disease.head()


Unnamed: 0,index,age,sex,chest_pain,rest_bp,chol,fbs,restecg,max_heart,exang,stdepress,peakex,maj_vess,thal,target
0,0,67,1,4,160,286,0,2,108,1,1.5,2,3,3,2
1,1,67,1,4,120,229,0,2,129,1,2.6,2,2,7,1
2,2,37,1,3,130,250,0,0,187,0,3.5,3,0,3,0
3,3,41,0,2,130,204,0,2,172,0,1.4,1,0,3,0
4,4,56,1,2,120,236,0,0,178,0,0.8,1,0,3,0


In [6]:
# Define the features set.
X = df_heart_disease.copy()
# Drop "target" Column and keep only the features
X = X.drop("target", axis=1)
X.head()

Unnamed: 0,index,age,sex,chest_pain,rest_bp,chol,fbs,restecg,max_heart,exang,stdepress,peakex,maj_vess,thal
0,0,67,1,4,160,286,0,2,108,1,1.5,2,3,3
1,1,67,1,4,120,229,0,2,129,1,2.6,2,2,7
2,2,37,1,3,130,250,0,0,187,0,3.5,3,0,3
3,3,41,0,2,130,204,0,2,172,0,1.4,1,0,3
4,4,56,1,2,120,236,0,0,178,0,0.8,1,0,3


In [7]:
# Define the target set.
y = df_heart_disease["target"].values
y[:5]

array([2, 1, 0, 0, 0])

In [8]:
# Split Data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=41)


In [9]:
# Determine the shape of our training and testing sets.
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(18, 14)
(7, 14)
(18,)
(7,)


In [10]:
# Creating a StandardScaler instance.
scaler = StandardScaler()
# Fitting the Standard Scaler with the training data.
X_scaler = scaler.fit(X_train)

# Scaling the data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [11]:
# Creating the decision tree classifier instance.
model = tree.DecisionTreeClassifier()
# Fitting the model.
model = model.fit(X_train_scaled, y_train)

In [12]:
# Making predictions using the testing data.
predictions = model.predict(X_test_scaled)

In [13]:
predictions

array([3, 2, 0, 1, 0, 0, 2])

In [14]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, predictions)

# Create a DataFrame from the confusion matrix.
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1","Actual 3","Actual 4"], columns=["Predicted 0", "Predicted 1", "Predicted 3", "Predicted 4"])

cm_df

Unnamed: 0,Predicted 0,Predicted 1,Predicted 3,Predicted 4
Actual 0,1,1,2,0
Actual 1,1,0,0,0
Actual 3,1,0,0,0
Actual 4,0,0,0,1
