In [13]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer


In [14]:
df = pd.read_csv('/content/heart_disease_uci.csv')


In [15]:
label_encoder = LabelEncoder()

In [16]:
for column in df.columns:
    if df[column].dtype == 'object':
        label_encoder.fit(df[column].unique())
        df[column] = label_encoder.transform(df[column])

In [17]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, 0:-1], df.iloc[:, -1], test_size=0.2, random_state=2)


In [18]:
imputer = SimpleImputer(strategy='mean')

In [19]:
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)

In [20]:
clf1 = LogisticRegression()
clf2 = DecisionTreeClassifier()
clf1.fit(X_train, y_train)
clf2.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [21]:
y_pred1 = clf1.predict(X_test)
y_pred2 = clf2.predict(X_test)

In [22]:
from sklearn.metrics import accuracy_score,confusion_matrix
print("Accuracy of Logistic Regression",accuracy_score(y_test,y_pred1))
print("Accuracy of Decision Trees",accuracy_score(y_test,y_pred2))

Accuracy of Logistic Regression 0.532608695652174
Accuracy of Decision Trees 0.5760869565217391


In [23]:
confusion_matrix(y_test,y_pred1)

array([[61, 15,  1,  2,  0],
       [21, 32,  4,  3,  0],
       [ 8, 10,  1,  3,  0],
       [ 3,  8,  3,  4,  0],
       [ 1,  2,  2,  0,  0]])

In [25]:
print("Logistic Regression Confusion Matrix\n")
num_classes = len(np.unique(y_test))
pd.DataFrame(confusion_matrix(y_test, y_pred1), columns=list(range(num_classes)))

Logistic Regression Confusion Matrix



Unnamed: 0,0,1,2,3,4
0,61,15,1,2,0
1,21,32,4,3,0
2,8,10,1,3,0
3,3,8,3,4,0
4,1,2,2,0,0


In [27]:
print("Decision Tree Confusion Matrix\n")
num_classes = len(np.unique(y_test))
pd.DataFrame(confusion_matrix(y_test, y_pred2), columns=list(range(num_classes)))

Decision Tree Confusion Matrix



Unnamed: 0,0,1,2,3,4
0,63,8,3,4,1
1,10,32,7,8,3
2,1,7,5,7,2
3,0,6,6,5,1
4,0,0,1,3,1


In [28]:
result = pd.DataFrame()
result['Actual Label'] = y_test
result['Logistic Regression Prediction'] = y_pred1
result['Decision Tree Prediction'] = y_pred2

In [29]:
result.sample(10)

Unnamed: 0,Actual Label,Logistic Regression Prediction,Decision Tree Prediction
343,0,0,0
525,1,0,1
418,0,0,0
226,0,0,0
440,0,1,3
699,3,1,2
62,1,0,1
295,0,0,1
879,1,1,3
7,0,0,0


In [30]:
from sklearn.metrics import recall_score,precision_score,f1_score

In [33]:
print("For Logistic regression Model")
print("-"*50)
num_classes = len(np.unique(y_test))
cdf = pd.DataFrame(confusion_matrix(y_test,y_pred1),columns=list(range(0,num_classes)), index=list(range(0,num_classes)))
print(cdf)
print("-"*50)
print("Precision - ",precision_score(y_test,y_pred1, average='weighted'))
print("Recall - ",recall_score(y_test,y_pred1, average='weighted'))
print("F1 score - ",f1_score(y_test,y_pred1, average='weighted'))

For Logistic regression Model
--------------------------------------------------
    0   1  2  3  4
0  61  15  1  2  0
1  21  32  4  3  0
2   8  10  1  3  0
3   3   8  3  4  0
4   1   2  2  0  0
--------------------------------------------------
Precision -  0.47784061882999435
Recall -  0.532608695652174
F1 score -  0.5004376981779012


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [35]:
print("For DT Model")
print("-"*50)
num_classes = len(np.unique(y_test))
cdf = pd.DataFrame(confusion_matrix(y_test,y_pred2),
                   columns=list(range(0,num_classes)),
                   index=list(range(0,num_classes)))
print(cdf)
print("-"*50)
print("Precision - ",precision_score(y_test,y_pred2, average='weighted'))
print("Recall - ",recall_score(y_test,y_pred2, average='weighted'))
print("F1 score - ",f1_score(y_test,y_pred2, average='weighted'))

For DT Model
--------------------------------------------------
    0   1  2  3  4
0  63   8  3  4  1
1  10  32  7  8  3
2   1   7  5  7  2
3   0   6  6  5  1
4   0   0  1  3  1
--------------------------------------------------
Precision -  0.6110951368719006
Recall -  0.5760869565217391
F1 score -  0.5913606259978168


In [36]:
precision_score(y_test,y_pred1,average=None)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


array([0.64893617, 0.47761194, 0.09090909, 0.33333333, 0.        ])

In [37]:
precision_score(y_test,y_pred2,average=None)

array([0.85135135, 0.60377358, 0.22727273, 0.18518519, 0.125     ])

In [38]:
recall_score(y_test,y_pred2,average=None)

array([0.79746835, 0.53333333, 0.22727273, 0.27777778, 0.2       ])