## Machine Learning

Using Logistic Regression as a classifier

In [94]:
import pandas as pd
df = pd.read_csv("./database/clean_heart.csv")
df.head()

Unnamed: 0,age,sex,chest_pain,blood_pressure,cholesterol,blood_sugar,resting_electro,max_heartrate,angina,st_oldpeak,st_slope,major_vessels,cardiopathy_type,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


In [95]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Separate variables and target
X = df.drop(["target"], axis=1)
y = df["target"]
X.shape, y.shape

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    random_state=1,
    stratify=y
)
print(X_train.shape, X_test.shape)

(768, 13) (257, 13)


In [42]:
# Scale the data
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
X_train_scaled.shape, X_test_scaled.shape

((768, 13), (257, 13))

In [96]:
# Create model
classifier = LogisticRegression(
    solver='lbfgs',
    max_iter=200, # upper limit of num of iter solver
    random_state=1
)
classifier.fit(X_train_scaled, y_train)
print(classifier)

LogisticRegression(max_iter=200, random_state=1)


In [100]:
# Making a prediction
y_pred = classifier.predict(X_test_scaled)
results = pd.DataFrame({"Prediction": y_pred, "Actual": y_test}).reset_index(drop=True)
# Training Score
train_score = classifier.score(X_train_scaled, y_train)
test_score = classifier.score(X_test_scaled, y_test)
print("train score:", train_score)
print("test score:", test_score)
results.head()

train score: 0.8411458333333334
test score: 0.8171206225680934


Unnamed: 0,Prediction,Actual
0,1,0
1,1,1
2,1,1
3,0,0
4,1,1


In [107]:
# store data
pd.DataFrame(y_test).to_csv("./database/y_test.csv", index=False)
pd.DataFrame(y_pred).to_csv("./database/y_pred.csv", index=False)