In [10]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score
import numpy as np
import tensorflow as tf

random_seed = 42

In [11]:
X_raw, y_raw = make_moons(1000, random_state=random_seed)
m, n = X_raw.shape
X_train, X_test, y_train, y_test = train_test_split(np.c_[np.ones([m, 1]), X_raw], y_raw.reshape(-1, 1), random_state=random_seed, test_size=0.2)

In [12]:
X_train_enhanced = np.c_[X_train, np.power(X_train[:, 1], 2), np.power(X_train[:, 2], 2), np.power(X_train[:, 1], 3), np.power(X_train[:, 2], 3)]
X_test_enhanced = np.c_[X_test, np.power(X_test[:, 1], 2), np.power(X_test[:, 2], 2), np.power(X_test[:, 1], 3), np.power(X_test[:, 2], 3)]

In [13]:
tf.reset_default_graph()
enhanced_feature_count = int(X_train_enhanced.shape[1])
def logistic_regression(X, y, learning_rate=0.01, initializer=None, seed=42):
    with tf.name_scope('LogisticRegression'):
        with tf.name_scope('Model'):
            if initializer is None:
                initializer = tf.random_uniform([enhanced_feature_count, 1], 1, -1, seed=seed)
            theta = tf.Variable(initializer, dtype=tf.float32, name="theta")
            logit = tf.matmul(X, theta)
            y_prob = tf.sigmoid(logit)
        with tf.name_scope('Train'):
            log_loss = tf.losses.log_loss(y, y_prob)
            optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
            train_op = optimizer.minimize(log_loss)
            loss_summary = tf.summary.scalar('log_loss', log_loss)
        with tf.name_scope('init'):
            init = tf.global_variables_initializer()
        with tf.name_scope('save'):
            saver = tf.train.Saver()
        return y_prob, log_loss, train_op, loss_summary, init, saver
        

In [14]:
from datetime import datetime

def get_log_dir(prefix=""):
    now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
    root_logdir = "tf_logs"
    if prefix:
        prefix += "-"
    name = prefix + "run-" + now
    return "{}/{}/".format(root_logdir, name)

In [15]:
log_dir = get_log_dir('logreg')
X = tf.placeholder(tf.float32, shape=(None, enhanced_feature_count), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
y_prob, log_loss, train_op, loss_summary, init, saver = logistic_regression(X, y)
file_writer = tf.summary.FileWriter(log_dir, tf.get_default_graph())

In [18]:
import os

n_epochs = 10000
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

checkpoint_path = "./my_logreg_model.ckpt"
checkpoint_epoch_path = checkpoint_path + ".epoch"
final_model_path = "./my_logreg_model"

def get_batches(epoch, batch_index, batch_size, X_input, y_input):
    np.random.seed(epoch * batch_index + batch_size)
    indices = np.random.randint(int(X_input.shape[0]), size=batch_size)
    X_batch = X_input[indices]
    y_batch = y_input[indices]
    return X_batch, y_batch

with tf.Session() as sess:
    if os.path.isfile(checkpoint_epoch_path):
        with open(checkpoint_epoch_path, 'rb') as f:
            start_epoch = int(f.read())
        print("Training was interrupted. Continuing at epoch", start_epoch)
        saver.restore(sess, checkpoint_path)
    else:
        start_epoch = 0
        init.run()
    
    for epoch in range(n_epochs):
        for batch_index in range(start_epoch, n_batches):
            X_batch, y_batch = get_batches(epoch, batch_index, batch_size, X_train_enhanced, y_train)
            sess.run(train_op, feed_dict={X: X_batch, y: y_batch})
            loss_val, summary_str = sess.run([log_loss, loss_summary], feed_dict={X: X_test_enhanced, y: y_test})
            file_writer.add_summary(summary_str, epoch)
        if epoch % 500 == 0:
            print("Epoch:", epoch, "\tLoss:", loss_val)
            saver.save(sess, checkpoint_path)
            with open(checkpoint_epoch_path, "wb") as f:
                f.write(b"%d" % (epoch + 1))
                    
    saver.save(sess, final_model_path)
    y_test_prob = y_prob.eval(feed_dict={X: X_test_enhanced, y: y_test})
    os.remove(checkpoint_epoch_path)

Epoch: 0 	Loss: 1.2377924
Epoch: 500 	Loss: 0.20205694
Epoch: 1000 	Loss: 0.14478992
Epoch: 1500 	Loss: 0.113825604
Epoch: 2000 	Loss: 0.09400337
Epoch: 2500 	Loss: 0.08019906
Epoch: 3000 	Loss: 0.07004305
Epoch: 3500 	Loss: 0.06226339
Epoch: 4000 	Loss: 0.056091126
Epoch: 4500 	Loss: 0.051066063
Epoch: 5000 	Loss: 0.046946354
Epoch: 5500 	Loss: 0.04340781
Epoch: 6000 	Loss: 0.04045478
Epoch: 6500 	Loss: 0.03786284
Epoch: 7000 	Loss: 0.035573967
Epoch: 7500 	Loss: 0.033580724
Epoch: 8000 	Loss: 0.031809647
Epoch: 8500 	Loss: 0.030212544
Epoch: 9000 	Loss: 0.028801717
Epoch: 9500 	Loss: 0.0274964


In [19]:
y_test_pred = y_test_prob > 0.5
print('Accuracy score: ', accuracy_score(y_test, y_test_pred))
print('Precision score: ', precision_score(y_test, y_test_pred))
print('Recall score: ', recall_score(y_test, y_test_pred))

Accuracy score:  1.0
Precision score:  1.0
Recall score:  1.0
