In [46]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("../src")

import numpy as np
from make_data import generate_data
from logistic_regression import LogisticRegression
from ftrl import FtrlProximal
from evaluation import eval_accuracy

# Common
EPOCH_SIZE = 100

# Logistic Regression
LEARNING_RATE = 0.1

# Ftrl
ALPHA = 0.005
BETA = 1.
L1 = 0.
L2 = 1.

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Data

In [7]:
TRAIN_X_PATH="../data/X_train.csv"
TRAIN_Y_PATH="../data/y_train.csv"
TEST_X_PATH="../data/X_test.csv"
TEST_Y_PATH="../data/y_test.csv"

In [8]:
generate_data(TRAIN_X_PATH, TRAIN_Y_PATH, TEST_X_PATH, TEST_Y_PATH)

In [9]:
X_train = np.loadtxt(TRAIN_X_PATH, delimiter=",")
y_train = np.loadtxt(TRAIN_Y_PATH, delimiter=",")
X_test = np.loadtxt(TEST_X_PATH, delimiter=",")
y_test = np.loadtxt(TEST_Y_PATH, delimiter=",")

## Test

In [10]:
X_train.shape, y_train.shape

((100, 3), (100,))

In [11]:
X_test.shape, y_test.shape

((50, 3), (50,))

# Modeling

## Logistic Regression

In [12]:
model_logit = LogisticRegression(lr=LEARNING_RATE, epoch=EPOCH_SIZE)
model_logit.fit(X_train, y_train)

In [13]:
model_logit.theta

array([ 0.17620337,  0.24883926,  0.94089415, -1.64400328])

## FTRL

In [29]:
feature_size = 3
model_ftrl = FtrlProximal(feature_size=feature_size, bias=True, alpha=ALPHA, beta=BETA, l1=L1, l2=L2)
model_ftrl.fit_generator(TRAIN_X_PATH, TRAIN_Y_PATH, EPOCH_SIZE)

2020-04-05 23:48:40.763518	epoch: 0	count: 100	logloss: 0.004860795812874137
2020-04-05 23:48:40.769939	epoch: 1	count: 100	logloss: 0.003897305647395401
2020-04-05 23:48:40.774167	epoch: 2	count: 100	logloss: 0.0032016139754847462
2020-04-05 23:48:40.777975	epoch: 3	count: 100	logloss: 0.0026705581604224244
2020-04-05 23:48:40.780526	epoch: 4	count: 100	logloss: 0.0022538840031804132
2020-04-05 23:48:40.783177	epoch: 5	count: 100	logloss: 0.0019208278278785163
2020-04-05 23:48:40.789379	epoch: 6	count: 100	logloss: 0.0016507257787798859
2020-04-05 23:48:40.792266	epoch: 7	count: 100	logloss: 0.0014290147808408325
2020-04-05 23:48:40.794706	epoch: 8	count: 100	logloss: 0.0012451157861349273
2020-04-05 23:48:40.796940	epoch: 9	count: 100	logloss: 0.0010911724815570135
2020-04-05 23:48:40.799074	epoch: 10	count: 100	logloss: 0.0009612467380742129
2020-04-05 23:48:40.802343	epoch: 11	count: 100	logloss: 0.0008507828597890443
2020-04-05 23:48:40.804827	epoch: 12	count: 100	logloss: 0.00075

In [30]:
print(model_ftrl.w)

{0: -0.7530029645172854, 1: -0.7947096106174495, 2: -0.7562993519174802, 3: -0.8243614392743873}


# Evaluation

In [49]:
preds_logit = model_logit.predict(X_test, 0.5)
scores_ftrl = model_ftrl.predict_proba_generator(TEST_X_PATH, TEST_Y_PATH)
preds_ftrl = np.where(np.array(scores_ftrl) > 0.5, 1, 0)

{0: -0.7530029645172854, 1: -0.7947096106174495, 2: -0.7562993519174802, 3: -0.8243614392743873}
{0: -0.7530029645172854, 1: -0.7947096106174495, 2: -0.7562993519174802, 3: -0.8243614392743873}


In [48]:
accuracy_logit = eval_accuray(preds_logit, y_test)
accuracy_ftrl = eval_accuray(preds_ftrl, y_test)
print("Losistic Regression: {}".format(accuracy_logit))
print("Ftrl: {}".format(accuracy_ftrl))

Losistic Regression: 1.0
Ftrl: 0.34
