## Importing the libraries



In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## load the dataset

In [8]:
dataset=pd.read_csv("../input/heart-disease-dataset/heart.csv")

In [36]:
dataset.head()

In [10]:
dataset.info()

In [14]:
dataset.describe().transpose()

In [16]:
dataset.columns

In [20]:
num_cols = []
for columns in dataset.columns:
    if (dataset[columns].nunique() > 2):
        num_cols.append(columns)


### num_cols indicates a numeric list of columns

In [21]:
num_cols

### vizualize the boxplot and infer the outliers present in the numeric columns

In [27]:
for columns in num_cols:
    sns.boxplot(data=dataset[columns], orient='h')
    plt.xlabel(columns)
    plt.show()

### finding out the max and min in boxplot and remove those records which are out of range in order to remove the outliers

In [29]:
for columns in num_cols:
    q3 , q1 =np.percentile(dataset[columns], [75,25])
    IQR = q3 - q1
    max_boxplot = q3 + 1.5*IQR
    min_boxplot = q1 - 1.5*IQR
    dataset=dataset[(dataset[columns] > min_boxplot) & (dataset[columns] < max_boxplot) ]
    
    

In [30]:
dataset.shape

In [31]:
for columns in num_cols:
    sns.boxplot(data=dataset[columns], orient='h')
    plt.xlabel(columns)
    plt.show()

### no of missing valves inside the features

In [34]:
dataset.isnull().sum()

In [39]:
dataset['target'].value_counts()

In [42]:
plt.figure(figsize=(12,8), dpi=200)
sns.heatmap(data = dataset.corr(), annot=True )

### X contain input valves Y contain Target values

In [43]:
X = dataset.drop('target',axis=1)
Y = dataset['target']

### spilitting data as train and test

In [54]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test, = train_test_split(X,Y,test_size=0.2, random_state=44)

In [55]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_x_train = scaler.fit_transform(x_train)
scaled_x_test = scaler.transform(x_test)

In [56]:
from sklearn.tree import DecisionTreeClassifier
dtreeModel = DecisionTreeClassifier().fit(scaled_x_train,y_train )

In [57]:
y_pred = dtreeModel.predict(scaled_x_test)
y_pred

### Model Evaluation

In [63]:
modelprediction = pd.DataFrame({'actual valve':y_test, 'predicted value':y_pred})
modelprediction.head(10)

In [67]:
from sklearn.metrics import classification_report
print(classification_report( y_pred, y_test))

In [68]:
print('Training set score: {:.4f}'.format(dtreeModel.score(scaled_x_train,y_train)))
print('Test set score: {:.4f}'.format(dtreeModel.score(scaled_x_test,y_test)))

In [72]:
from sklearn.metrics import plot_confusion_matrix
plot_confusion_matrix(dtreeModel,scaled_x_test,y_test)

In [75]:
from sklearn import tree
plt.figure(figsize=(20,20))
tree.plot_tree(dtreeModel, filled=True)