# Imports

In [2]:
import pandas as pd
import pylab as pl
import numpy as np
from sklearn import preprocessing
from sklearn import metrics
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline 

# The Dataset

### Read

In [12]:
hrt_df = pd.read_csv("hearth.csv")
hrt_df.head()

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,output
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


### Select X , Y

In [13]:
hrt_df_x = np.asanyarray(hrt_df[['age', 'sex', 'cp', 'trtbps', 'chol', 'fbs', 'restecg', 'thalachh', 'exng', 'oldpeak', 'slp', 'caa', 'thall']])
hrt_df_y = np.asanyarray(hrt_df['output'])

#hrt_df_x[0:5]
hrt_df_y[0:5]

array([1, 1, 1, 1, 1], dtype=int64)

# LR (LogisticRegression)

In [14]:
x_lr = hrt_df_x_norm
y_lr = hrt_df_y

### Normalize

In [15]:
scaler = preprocessing.StandardScaler().fit(x_lr)
x_lr_norm = scaler.transform(x_lr)
x_lr_norm[0:5]

array([[ 0.9521966 ,  0.68100522,  1.97312292,  0.76395577, -0.25633371,
         2.394438  , -1.00583187,  0.01544279, -0.69663055,  1.08733806,
        -2.27457861, -0.71442887, -2.14887271],
       [-1.91531289,  0.68100522,  1.00257707, -0.09273778,  0.07219949,
        -0.41763453,  0.89896224,  1.63347147, -0.69663055,  2.12257273,
        -2.27457861, -0.71442887, -0.51292188],
       [-1.47415758, -1.46841752,  0.03203122, -0.09273778, -0.81677269,
        -0.41763453, -1.00583187,  0.97751389, -0.69663055,  0.31091206,
         0.97635214, -0.71442887, -0.51292188],
       [ 0.18017482,  0.68100522,  0.03203122, -0.66386682, -0.19835726,
        -0.41763453,  0.89896224,  1.23989692, -0.69663055, -0.20670527,
         0.97635214, -0.71442887, -0.51292188],
       [ 0.29046364, -1.46841752, -0.93851463, -0.66386682,  2.08204965,
        -0.41763453,  0.89896224,  0.58393935,  1.43548113, -0.37924438,
         0.97635214, -0.71442887, -0.51292188]])

### 80-20 Data Separation

In [16]:
x_lr_norm_train, x_lr_norm_test, y_lr_train, y_lr_test = train_test_split(x_lr_norm, y_lr, test_size=0.2, random_state=4)
print ('Train set:', x_lr_norm_train.shape,  y_lr_train.shape)
print ('Test set:', x_lr_norm_test.shape,  y_lr_test.shape)

Train set: (242, 13) (242,)
Test set: (61, 13) (61,)


### Classification

In [34]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(C=0.01, solver='saga').fit(x_lr_norm_train, y_lr_train)
y_ = lr.predict(x_lr_norm_test)
y_prob = lr.predict_proba(x_lr_norm_test)

print(y_)
print(y_lr_test)
print(y_prob)

[1 0 1 1 1 1 0 1 1 1 1 1 1 0 1 0 1 1 0 1 1 1 0 1 1 0 0 1 1 0 0 1 1 1 1 1 1
 1 1 0 0 0 0 1 1 1 0 0 0 1 0 1 0 1 1 1 1 1 0 0 1]
[1 0 1 1 0 0 0 1 1 1 1 1 1 0 1 0 1 1 0 0 1 1 0 1 1 0 0 1 1 0 0 1 1 1 0 1 1
 1 0 0 0 0 0 1 1 1 0 0 0 1 0 1 1 1 1 1 1 1 0 0 1]
[[0.35294566 0.64705434]
 [0.70004992 0.29995008]
 [0.37869372 0.62130628]
 [0.1761531  0.8238469 ]
 [0.37722859 0.62277141]
 [0.47625734 0.52374266]
 [0.60768358 0.39231642]
 [0.21594784 0.78405216]
 [0.46826675 0.53173325]
 [0.34078284 0.65921716]
 [0.40345714 0.59654286]
 [0.18647407 0.81352593]
 [0.37064897 0.62935103]
 [0.54471694 0.45528306]
 [0.31055455 0.68944545]
 [0.84958133 0.15041867]
 [0.27351639 0.72648361]
 [0.2716512  0.7283488 ]
 [0.65826879 0.34173121]
 [0.43338158 0.56661842]
 [0.22383012 0.77616988]
 [0.36599003 0.63400997]
 [0.78899046 0.21100954]
 [0.43717181 0.56282819]
 [0.4262527  0.5737473 ]
 [0.73333576 0.26666424]
 [0.56590302 0.43409698]
 [0.210903   0.789097  ]
 [0.41266239 0.58733761]
 [0.78986911 0.21013089]


### Evaluation

In [18]:
from sklearn.metrics import confusion_matrix, jaccard_score, classification_report

In [35]:
jaccard_score(y_lr_test, y_, pos_label=0)

0.7692307692307693

In [36]:
print(confusion_matrix(y_lr_test, y_, labels=[1,0]))

[[35  1]
 [ 5 20]]


In [37]:
print (classification_report(y_lr_test, y_))

              precision    recall  f1-score   support

           0       0.95      0.80      0.87        25
           1       0.88      0.97      0.92        36

    accuracy                           0.90        61
   macro avg       0.91      0.89      0.90        61
weighted avg       0.91      0.90      0.90        61



# RC (RidgeClassifier)

In [48]:
from sklearn.linear_model import RidgeClassifier

rc = RidgeClassifier(alpha=1, solver='sparse_cg').fit(x_lr_norm_train, y_lr_train)
y_rc = lr.predict(x_lr_norm_test)
y_prob_rc = lr.predict_proba(x_lr_norm_test)

print(y_rc)
print(y_lr_test)
print(y_prob_rc)

[1 0 1 1 1 1 0 1 1 1 1 1 1 0 1 0 1 1 0 1 1 1 0 1 1 0 0 1 1 0 0 1 1 1 1 1 1
 1 1 0 0 0 0 1 1 1 0 0 0 1 0 1 0 1 1 1 1 1 0 0 1]
[1 0 1 1 0 0 0 1 1 1 1 1 1 0 1 0 1 1 0 0 1 1 0 1 1 0 0 1 1 0 0 1 1 1 0 1 1
 1 0 0 0 0 0 1 1 1 0 0 0 1 0 1 1 1 1 1 1 1 0 0 1]
[[0.35294566 0.64705434]
 [0.70004992 0.29995008]
 [0.37869372 0.62130628]
 [0.1761531  0.8238469 ]
 [0.37722859 0.62277141]
 [0.47625734 0.52374266]
 [0.60768358 0.39231642]
 [0.21594784 0.78405216]
 [0.46826675 0.53173325]
 [0.34078284 0.65921716]
 [0.40345714 0.59654286]
 [0.18647407 0.81352593]
 [0.37064897 0.62935103]
 [0.54471694 0.45528306]
 [0.31055455 0.68944545]
 [0.84958133 0.15041867]
 [0.27351639 0.72648361]
 [0.2716512  0.7283488 ]
 [0.65826879 0.34173121]
 [0.43338158 0.56661842]
 [0.22383012 0.77616988]
 [0.36599003 0.63400997]
 [0.78899046 0.21100954]
 [0.43717181 0.56282819]
 [0.4262527  0.5737473 ]
 [0.73333576 0.26666424]
 [0.56590302 0.43409698]
 [0.210903   0.789097  ]
 [0.41266239 0.58733761]
 [0.78986911 0.21013089]


### Evaluation

In [49]:
print(jaccard_score(y_lr_test, y_rc, pos_label=0))
print(confusion_matrix(y_lr_test, y_rc, labels=[1,0]))
print (classification_report(y_lr_test, y_rc))

0.7692307692307693
[[35  1]
 [ 5 20]]
              precision    recall  f1-score   support

           0       0.95      0.80      0.87        25
           1       0.88      0.97      0.92        36

    accuracy                           0.90        61
   macro avg       0.91      0.89      0.90        61
weighted avg       0.91      0.90      0.90        61

