In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

In [2]:
from sklearn.linear_model import LogisticRegression 
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier

In [3]:
df=pd.read_csv("heart1.csv")
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,77,1,0,125,304,0,0,162,1,0.0,2,3,2,0
1,76,0,2,140,197,0,2,116,0,1.1,1,0,2,1
2,76,0,2,140,197,0,2,116,0,1.1,1,0,2,1
3,76,0,2,140,197,0,2,116,0,1.1,1,0,2,1
4,74,0,1,120,269,0,0,121,1,0.2,2,1,2,1


In [4]:
cp=pd.get_dummies(df['cp'],prefix='cp',drop_first=True)
thal=pd.get_dummies(df['ca'],prefix='ca',drop_first=True)
slope=pd.get_dummies(df['slope'],prefix='slope',drop_first=True)

In [5]:
new_df=pd.concat([df,cp,thal,slope],axis=1)
new_df=new_df.drop(['cp','ca','slope'],axis=1)
new_df.head()

Unnamed: 0,age,sex,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,thal,target,cp_1,cp_2,cp_3,ca_1,ca_2,ca_3,ca_4,slope_1,slope_2
0,77,1,125,304,0,0,162,1,0.0,2,0,0,0,0,0,0,1,0,0,1
1,76,0,140,197,0,2,116,0,1.1,2,1,0,1,0,0,0,0,0,1,0
2,76,0,140,197,0,2,116,0,1.1,2,1,0,1,0,0,0,0,0,1,0
3,76,0,140,197,0,2,116,0,1.1,2,1,0,1,0,0,0,0,0,1,0
4,74,0,120,269,0,0,121,1,0.2,2,1,1,0,0,1,0,0,0,0,1


In [6]:
X = new_df.drop(["target","fbs","restecg","thal"],axis=1)
y = new_df['target']

In [7]:
#Split data into training and test data
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=2)

# Decision Tree

In [13]:
decitrr=DecisionTreeClassifier()
decitrr.fit(X_train,y_train)

DecisionTreeClassifier()

In [14]:
pre_deci=decitrr.predict(X_test)

In [15]:
decitrr.score(X_test,y_test)

0.8974358974358975

In [42]:
acc=accuracy_score(pre_deci,y_test)

In [43]:
cf=confusion_matrix(pre_deci,y_test)

In [44]:
print("\nDecision Tree")
print("Confusion Matrix")
print(cf)
print("Accuracy: "+"{:.2%}".format(acc))


Decision Tree
Confusion Matrix
[[ 26   5]
 [ 11 114]]
Accuracy: 89.74%


# Random Forest

In [35]:
rf=RandomForestClassifier(criterion='gini',bootstrap=False, max_depth=50, max_features=5,
                       n_estimators=10)
rf.fit(X_train, y_train)

RandomForestClassifier(bootstrap=False, max_depth=50, max_features=5,
                       n_estimators=10)

In [36]:
rf.predict(X_test)

array([1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1], dtype=int64)

In [37]:
pre_rf=rf.predict(X_test)

In [38]:
rf.score(X_test,y_test)

0.9230769230769231

In [47]:
acc1=accuracy_score(pre_rf, y_test)

In [48]:
cf1=confusion_matrix(pre_rf,y_test)

In [49]:
print("\nRandom Forest")
print("Confusion Matrix")
print(cf1)
print("Accuracy: "+"{:.2%}".format(acc1))


Random Forest
Confusion Matrix
[[ 28   3]
 [  9 116]]
Accuracy: 92.31%


# Adaboost

In [50]:
abc=AdaBoostClassifier(learning_rate=1)
abc.fit(X_train, y_train)

AdaBoostClassifier(learning_rate=1)

In [51]:
abc_pre=abc.predict(X_test)

In [52]:
abc.score(X_test,y_test)

0.9102564102564102

In [53]:
acc2=accuracy_score(abc_pre, y_test)

In [54]:
cf2=confusion_matrix(abc_pre,y_test)

In [55]:
print("\nAdaBoost")
print("Confusion Matrix")
print(cf2)
print("Accuracy: "+"{:.2%}".format(acc2))


AdaBoost
Confusion Matrix
[[ 28   5]
 [  9 114]]
Accuracy: 91.03%


# Logistic Regression

In [56]:
lr=LogisticRegression(C=2)
lr.fit(X_train,y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression(C=2)

In [57]:
predictionlr=lr.predict(X_test)

In [58]:
lr.score(X_test,y_test)

0.9102564102564102

In [62]:
acc3=accuracy_score(predictionlr, y_test)

In [63]:
cf3=confusion_matrix(predictionlr,y_test)

In [65]:
print("\nLogisticRegression")
print("Confusion Matrix")
print(cf3)
print("Accuracy: "+"{:.2%}".format(acc3))


LogisticRegression
Confusion Matrix
[[ 25   2]
 [ 12 117]]
Accuracy: 91.03%
