### **1. Import Libraries and Dataset**

In [1]:
import pandas as pd
import numpy as np

In [2]:
df0 = pd.read_csv("heart.csv")
df = df0.copy()
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


In [18]:
df.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
      dtype='object')

In [56]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1025 entries, 0 to 1024
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1025 non-null   int64  
 1   sex       1025 non-null   int64  
 2   cp        1025 non-null   int64  
 3   trestbps  1025 non-null   int64  
 4   chol      1025 non-null   int64  
 5   fbs       1025 non-null   int64  
 6   restecg   1025 non-null   int64  
 7   thalach   1025 non-null   int64  
 8   exang     1025 non-null   int64  
 9   oldpeak   1025 non-null   float64
 10  slope     1025 non-null   int64  
 11  ca        1025 non-null   int64  
 12  thal      1025 non-null   int64  
 13  target    1025 non-null   int64  
dtypes: float64(1), int64(13)
memory usage: 112.2 KB


In [57]:
df["thal"] = df["thal"].replace(0, np.nan)
df["thal"].fillna(2, inplace = True)
df["thal"] = df["thal"].astype("int")

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["thal"].fillna(2, inplace = True)


### **Logistic Regression**

In [58]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [59]:
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

In [60]:
pipeline = Pipeline([
    ('scaler', MinMaxScaler()),
    ('model', LogisticRegression())
])

In [61]:
from sklearn.model_selection import train_test_split

In [62]:
X = df.drop("target", axis=1)
y = df["target"]

In [63]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [64]:
pipeline.fit(X_train, y_train)

In [65]:
y_pred = pipeline.predict(X_test)

In [66]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

log_reg_df = pd.DataFrame({"Model": "Logistic Regression Classifier",
              "Accuracy": accuracy,
              "Precision": precision,
              "Recall": recall,
              "F1-Score": f1},
            index = [0])
log_reg_df

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,Logistic Regression Classifier,0.817121,0.774648,0.88,0.82397


### **Support Vector Machines**

In [67]:
from sklearn.svm import SVC

In [68]:
pipeline = Pipeline([
    ('scaler', MinMaxScaler()),
    ('model', SVC())
])

In [69]:
pipeline.fit(X_train, y_train)

In [70]:
y_pred = pipeline.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

svc_df = pd.DataFrame({"Model": "Support Vector Classifier",
              "Accuracy": accuracy,
              "Precision": precision,
              "Recall": recall,
              "F1-Score": f1},
            index = [0])
svc_df

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,Support Vector Classifier,0.848249,0.835938,0.856,0.84585


### **Random Forest**

In [71]:
from sklearn.ensemble import RandomForestClassifier

In [72]:
pipeline = Pipeline([
    ('scaler', MinMaxScaler()),
    ('model', RandomForestClassifier())
])

In [73]:
pipeline.fit(X_train, y_train)

In [74]:
y_pred = pipeline.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

rf_df = pd.DataFrame({"Model": "Random Forest Classifier",
              "Accuracy": accuracy,
              "Precision": precision,
              "Recall": recall,
              "F1-Score": f1},
            index = [0])
rf_df

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,Random Forest Classifier,0.988327,1.0,0.976,0.987854


### **Gradient Boosting**

In [75]:
from sklearn.ensemble import GradientBoostingClassifier

In [76]:
pipeline = Pipeline([
    ('scaler', MinMaxScaler()),
    ('model', GradientBoostingClassifier())
])

In [77]:
pipeline.fit(X_train, y_train)

In [78]:
y_pred = pipeline.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

gb_df = pd.DataFrame({"Model": "Gradient Boosting Classifier",
              "Accuracy": accuracy,
              "Precision": precision,
              "Recall": recall,
              "F1-Score": f1},
            index = [0])
gb_df

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,Gradient Boosting Classifier,0.949416,0.951613,0.944,0.947791


In [80]:
eval_df = pd.concat([log_reg_df, svc_df, rf_df, gb_df])
eval_df

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,Logistic Regression Classifier,0.817121,0.774648,0.88,0.82397
0,Support Vector Classifier,0.848249,0.835938,0.856,0.84585
0,Random Forest Classifier,0.988327,1.0,0.976,0.987854
0,Gradient Boosting Classifier,0.949416,0.951613,0.944,0.947791


### **Create a Pipeline**

In [85]:
import joblib

In [83]:
pipeline = Pipeline([
    ('scaler', MinMaxScaler()),
    ('model', LogisticRegression())
])

In [84]:
pipeline.fit(X, y)

In [86]:
with open('logistic_regression.pkl', 'wb') as f:
    joblib.dump(pipeline, f)

In [87]:
pipeline = Pipeline([
    ('scaler', MinMaxScaler()),
    ('model', SVC())
])

In [88]:
pipeline.fit(X, y)

In [89]:
with open('support_vector.pkl', 'wb') as f:
    joblib.dump(pipeline, f)

In [90]:
pipeline = Pipeline([
    ('scaler', MinMaxScaler()),
    ('model', RandomForestClassifier())
])

In [91]:
pipeline.fit(X, y)

In [92]:
with open('random_forest.pkl', 'wb') as f:
    joblib.dump(pipeline, f)

In [93]:
pipeline = Pipeline([
    ('scaler', MinMaxScaler()),
    ('model', GradientBoostingClassifier())
])

In [94]:
pipeline.fit(X, y)

In [95]:
with open('gradient_boosting.pkl', 'wb') as f:
    joblib.dump(pipeline, f)

In [96]:
X

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,59,1,1,140,221,0,1,164,1,0.0,2,0,2
1021,60,1,0,125,258,0,0,141,1,2.8,1,1,3
1022,47,1,0,110,275,0,0,118,1,1.0,1,1,2
1023,50,0,0,110,254,0,0,159,0,0.0,2,0,2
