In [None]:
from files import get_onehot_dataset
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import LogisticRegression
import numpy as np
import io
import sys

In [None]:
class DisplayLossCurve(object):
    def __init__(self, print_loss=False):
        self.print_loss = print_loss

    """Make sure the model verbose is set to 1"""

    def __enter__(self):
        self.old_stdout = sys.stdout
        sys.stdout = self.mystdout = io.StringIO()

    def __exit__(self, *args, **kwargs):
        sys.stdout = self.old_stdout
        loss_history = self.mystdout.getvalue()
        loss_list = []
        for line in loss_history.split('\n'):
            if (len(line.split("loss: ")) == 1):
                continue
            loss_list.append(float(line.split("loss: ")[-1]))
        plt.figure()
        plt.plot(np.arange(len(loss_list)), loss_list)
        plt.xlabel("Epoch")
        plt.ylabel("Loss")
        plt.show()
        if self.print_loss:
            print("=============== Loss Array ===============")
            print(np.array(loss_list))


In [None]:
df = get_onehot_dataset()
y = df[["cardio"]]
X = df.loc[:, df.columns != 'cardio']
X = X.to_numpy()
y = y.to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
y_train = np.ravel(y_train)
y_test = np.ravel(y_test)

In [None]:
# Displaying loss
with DisplayLossCurve():
    SGDClassifier(loss="log_loss", learning_rate="constant", eta0=0.001, max_iter=5000, verbose=1).fit(X_train, y_train)

In [None]:
# Changing learning rate
with DisplayLossCurve():
    SGDClassifier(loss="log_loss", learning_rate="constant", eta0=0.0005, max_iter=5000, verbose=1).fit(X_train, y_train)
with DisplayLossCurve():
    SGDClassifier(loss="log_loss", learning_rate="constant", eta0=0.2, max_iter=5000, verbose=1).fit(X_train, y_train)
with DisplayLossCurve():
    SGDClassifier(loss="log_loss", learning_rate="constant", eta0=0.3, max_iter=5000, verbose=1).fit(X_train, y_train)

In [None]:
# Stratified cross validation
model = SGDClassifier(loss="log_loss", learning_rate="constant", eta0=0.001, max_iter=50000)
# model = LogisticRegression()
skf = StratifiedKFold(n_splits=20)
acc = []
for train, test in skf.split(X, y):
    X_train, X_test = X[train], X[test]
    y_train, y_test = y[train], y[test]
    y_train = np.ravel(y_train)
    y_test = np.ravel(y_test)
    model.fit(X_train, y_train)
    acc.append(model.score(X_test, y_test))

In [None]:
print("Accuracy: ", acc)
plt.plot(acc)
plt.xlabel("epoch")
plt.ylabel("K fold")
plt.show()