In [39]:
# https://archive.ics.uci.edu/dataset/45/heart+disease

# !pip install ucimlrepo
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
from ucimlrepo import fetch_ucirepo
heart_disease = fetch_ucirepo(id=45)

In [14]:
df = pd.DataFrame(data=heart_disease.data.features)
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,63,1,1,145,233,1,2,150,0,2.3,3,0.0,6.0
1,67,1,4,160,286,0,2,108,1,1.5,2,3.0,3.0
2,67,1,4,120,229,0,2,129,1,2.6,2,2.0,7.0
3,37,1,3,130,250,0,0,187,0,3.5,3,0.0,3.0
4,41,0,2,130,204,0,2,172,0,1.4,1,0.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,45,1,1,110,264,0,0,132,0,1.2,2,0.0,7.0
299,68,1,4,144,193,1,0,141,0,3.4,2,2.0,7.0
300,57,1,4,130,131,0,0,115,1,1.2,2,1.0,7.0
301,57,0,2,130,236,0,2,174,0,0.0,2,1.0,3.0


In [20]:
heart_disease.variables

Unnamed: 0,name,role,type,demographic,description,units,missing_values
0,age,Feature,Integer,Age,,years,no
1,sex,Feature,Categorical,Sex,,,no
2,cp,Feature,Categorical,,,,no
3,trestbps,Feature,Integer,,resting blood pressure (on admission to the ho...,mm Hg,no
4,chol,Feature,Integer,,serum cholestoral,mg/dl,no
5,fbs,Feature,Categorical,,fasting blood sugar > 120 mg/dl,,no
6,restecg,Feature,Categorical,,,,no
7,thalach,Feature,Integer,,maximum heart rate achieved,,no
8,exang,Feature,Categorical,,exercise induced angina,,no
9,oldpeak,Feature,Integer,,ST depression induced by exercise relative to ...,,no


In [23]:
df["target"] = heart_disease.data.targets
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,1,145,233,1,2,150,0,2.3,3,0.0,6.0,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3.0,3.0,2
2,67,1,4,120,229,0,2,129,1,2.6,2,2.0,7.0,1
3,37,1,3,130,250,0,0,187,0,3.5,3,0.0,3.0,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0.0,3.0,0


In [None]:
# drop na
if df.isna().any().any():
    df.dropna(inplace=True)

In [32]:
df.isna().any().any(), df.isna().sum()

(False,
 age         0
 sex         0
 cp          0
 trestbps    0
 chol        0
 fbs         0
 restecg     0
 thalach     0
 exang       0
 oldpeak     0
 slope       0
 ca          0
 thal        0
 target      0
 dtype: int64)

In [44]:
X = df.drop("target", axis=1).values
y = df["target"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression(penalty="l2", C=1.0, solver="lbfgs", max_iter=200) # L2 regularization is Lasso 
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(y_pred)

[0 0 0 2 0 0 0 0 3 0 0 0 0 0 0 0 0 0 2 0 1 2 1 0 3 0 3 1 0 0 0 0 3 0 0 0 1
 0 3 1 0 2 3 0 0 0 0 1 0 0 0 2 0 0 2 1 0 0 0 3]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=200).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [45]:
model.score(X_test, y_test)

0.7

In [51]:
model.predict_proba(X_test)[:5]

array([[9.09848998e-01, 7.61839643e-02, 7.22515977e-03, 2.86342561e-03,
        3.87845190e-03],
       [8.50845231e-01, 1.22980779e-01, 2.00763823e-02, 2.37607873e-03,
        3.72152855e-03],
       [9.54189687e-01, 4.18011788e-02, 2.75855362e-03, 4.51354963e-04,
        7.99225466e-04],
       [4.67308774e-02, 2.37101193e-01, 4.71187552e-01, 8.36245066e-02,
        1.61355871e-01],
       [9.65783104e-01, 3.16756814e-02, 1.98043708e-03, 1.71538971e-04,
        3.89238700e-04]])

In [None]:
import numpy as np
np.argmax(model.predict_proba(X_test), axis=1) # With softmax activation function

array([0, 0, 0, 2, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 2,
       1, 0, 3, 0, 3, 1, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 3, 1, 0, 2, 3, 0,
       0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 2, 1, 0, 0, 0, 3], dtype=int64)