In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, mean_squared_error, confusion_matrix, classification_report


In [16]:
complete_data = pd.read_csv("cardio-complete.csv")

In [21]:
X_train, X_test, Y_train, Y_test = train_test_split(complete_data.iloc[:, 1:-1], complete_data.iloc[:, -1], train_size=0.70, test_size=0.30)
model = LogisticRegression(max_iter=1000)
# Map training data from string to numerical
X_train.cholesterol = X_train.cholesterol.map({'Normal':1, 'Above Normal':2, "High":3})
X_train.gluc = X_train.gluc.map({'Normal':1, 'Above Normal':2, "High":3})
X_train.gender = X_train.gender.map({'Men':0, 'Women':1})
# Map testing data from string to numerical
X_test.cholesterol = X_test.cholesterol.map({'Normal':1, 'Above Normal':2, "High":3})
X_test.gluc = X_test.gluc.map({'Normal':1, 'Above Normal':2, "High":3})
X_test.gender = X_test.gender.map({'Men':0, 'Women':1})
# Apply same standardization to this data as was done preivously
X_test_stand = X_test.copy()
X_train_stand = X_train.copy()
cols = ['age', 'weight', 'ap_lo', 'ap_hi']

for col in cols:
    scale = StandardScaler().fit(X_train[[col]])
    X_train[col] = scale.transform(X_train[[col]])
    X_test[col] = scale.transform(X_test[[col]])

model.fit(X_train_stand, Y_train)
predictions_sk = model.predict(X_test_stand)

print("Accuracy: {}% ".format(accuracy_score(Y_test, predictions_sk)*100),
      "RMSE: {}".format(mean_squared_error(Y_test, predictions_sk, squared=False)),
      "Confusion Matrix: \n" + str(confusion_matrix(Y_test, predictions_sk)),
      classification_report(Y_test, predictions_sk),
      sep='\n')


Accuracy: 69.0% 
RMSE: 0.5567764362830022
Confusion Matrix: 
[[113  38]
 [ 55  94]]
              precision    recall  f1-score   support

           0       0.67      0.75      0.71       151
           1       0.71      0.63      0.67       149

    accuracy                           0.69       300
   macro avg       0.69      0.69      0.69       300
weighted avg       0.69      0.69      0.69       300

