In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy
import seaborn as sns

In [None]:
data = pd.read_csv("heart.csv")
data.head()

In [None]:
data.info()

In [None]:
data.isnull().sum()

In [None]:
data.hist(figsize=(20,20))
plt.show()

In [None]:
sns.countplot(x='target',data=data)

In [None]:
data['target'].value_counts()

In [None]:
sns.countplot(x='sex',hue='target',data=data)

In [None]:
data.groupby('sex')['target'].value_counts()

In [None]:
data.head()

In [None]:
plt.figure(figsize=(10,10))
sns.countplot(x='age',data=data)


plt.show()

In [None]:
data.age.hist(bins=80)

In [None]:
data.groupby('age')['target'].value_counts()

In [None]:
plt.figure(figsize=(20,10))
sns.countplot(x='age',hue='target',data=data)
plt.show()

In [None]:
print(f"The mean age is: {data.age.mean()}")

In [None]:
d_plot = data.copy()

In [None]:
import plotly
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo
from plotly.offline import init_notebook_mode,plot,iplot

In [None]:
# 2 data sets
D = d_plot[d_plot['target']!=0]
H = d_plot[d_plot['target']==0]

def target_countplot():
    grph = go.Bar(x=d_plot['target'].value_counts().values.tolist(),
                 y=['Heart Disease','Healthy'],
                 orientation='h',
                 text=d_plot['target'].value_counts().values.tolist(),
                 textposition='inside',
                 marker=dict(color=['red','green'],
                            line=dict(color='black',width=1.5)))
    layout = dict(title="count of target column")
    fig=dict(data=[grph],layout=layout)
    iplot(fig)
                  
def target_pie():
    grph = go.Pie(labels=['Heart Disease','Healthy'],
                  values=d_plot['target'].value_counts(),
                  marker=dict(colors=['red','green'],
                             line=dict(color="black",width=1.5)))
    layout=dict(title="Heart disease VS Healthy")
    fig=dict(data=[grph],layout=layout)
    iplot(fig)
                  
                  

In [None]:
target_countplot()
target_pie()

In [None]:
categorical_values=['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal', 'slope']

In [None]:
plt.figure(figsize=(15,10))
for i, col in enumerate(categorical_values,1):
    plt.subplot(3,3,i)
    sns.barplot(x=f"{col}",y="target",data=data)
    plt.xlabel(f"{col}")
    plt.ylabel("Possibility of Heart Disease")

MODEL BUILDING

In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn import tree

In [None]:
data.head()

In [None]:
X=data.drop('target',axis=1)
Y=data['target']

In [None]:
d_tree=DecisionTreeClassifier()
lr=LogisticRegression()

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size=0.2,random_state=0)

In [None]:
d_tree=d_tree.fit(x_train,y_train)
y_pred = d_tree.predict(x_test)
acc = accuracy_score(y_test,y_pred)*100
print("The accuracy of the Decision Tree model is: {}".format(acc))

In [None]:
lr.fit(x_train,y_train)
y_lr_pred = lr.predict(x_test)
acc_lr = accuracy_score(y_test,y_lr_pred)*100
print(f"The accuracy of the logistic regression model is: {acc_lr}")

In [None]:
conf = confusion_matrix(y_test,y_pred)
sns.set(font_scale=1.5)
sns.heatmap(conf,annot=True,fmt='g')
plt.show()

In [None]:
conf = confusion_matrix(y_test,y_lr_pred)
sns.set(font_scale=1.5)
sns.heatmap(conf,annot=True,fmt='g')
plt.show()

In [None]:
text_repr = tree.export_text(d_tree)
print(text_repr)

In [None]:
feature_names=['age','sex', 'cp','trestbps','chol', 'fbs', 'restecg','thalach', 'exang','oldpeak', 'ca', 'thal', 'slope']
fig = plt.figure(figsize=(25,20))
plot = tree.plot_tree(d_tree,feature_names=feature_names,class_names="target",filled=True)
fig.savefig("decistion_tree.png")

In [None]:

data.head()