In [11]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import f1_score, accuracy_score , recall_score , precision_score , classification_report , confusion_matrix
import joblib

In [12]:
data = pd.read_csv("heart.csv")

In [13]:
X = data.iloc[:,:-1]
X.head()
Y = data.iloc[:,-1]
Y.head()
target = data.groupby("target").age.agg([len])

In [14]:
target.head()
#This dataset is approximately balanced So i will use the accuracy as a metric for evaluating my models

Unnamed: 0_level_0,len
target,Unnamed: 1_level_1
0,499
1,526


In [15]:
# split my dataset
X_train , X_test , Y_train , Y_test = train_test_split(X,Y,test_size=0.25)

In [16]:
#Use the SVC model
model = SVC(kernel="linear")
model.fit(X_train,Y_train)
predictions = model.predict(X_test)
accuracy = accuracy_score(Y_test,predictions)
classification_rep = classification_report(Y_test,predictions)
print(accuracy)
print(classification_rep)

0.867704280155642
              precision    recall  f1-score   support

           0       0.92      0.79      0.85       124
           1       0.83      0.94      0.88       133

    accuracy                           0.87       257
   macro avg       0.88      0.87      0.87       257
weighted avg       0.87      0.87      0.87       257



In [17]:
#Use the logistic regression model
model = LogisticRegression()
model.fit(X_train,Y_train)
predictions = model.predict(X_test)
accuracy = accuracy_score(Y_test,predictions)
classification_rep = classification_report(Y_test,predictions)
print(accuracy)
print(classification_rep)

0.8365758754863813
              precision    recall  f1-score   support

           0       0.87      0.78      0.82       124
           1       0.81      0.89      0.85       133

    accuracy                           0.84       257
   macro avg       0.84      0.83      0.84       257
weighted avg       0.84      0.84      0.84       257



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [18]:
#Use the decision tree classifier
model = DecisionTreeClassifier()
model.fit(X_train,Y_train)
predictions = model.predict(X_test)
accuracy = accuracy_score(Y_test,predictions)
classification_rep = classification_report(Y_test,predictions)
print(accuracy)
print(classification_rep)


0.9883268482490273
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       124
           1       1.00      0.98      0.99       133

    accuracy                           0.99       257
   macro avg       0.99      0.99      0.99       257
weighted avg       0.99      0.99      0.99       257



In [19]:
# The best model is the decision tree classifier with 0.98 accuracy
# Now we are going to save the decision tree model in a file using joblib library
joblib.dump(model , "model.sav")

['model.sav']