In [1]:
# Dataset
import numpy as np

from tensorflow.examples.tutorials.mnist import input_data
from sklearn.datasets import fetch_mldata
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score

mnist = input_data.read_data_sets("MNIST_data/")

mnist_images = mnist.train.images
mnist_labels = mnist.train.labels

n_three, n_five = sum(mnist_labels==3), sum(mnist_labels==5)

X_all = np.vstack([
    mnist_images[mnist_labels==3,:],
    mnist_images[mnist_labels==5,:]
])

y_all = np.array([1]*n_three + [0]*n_five)
# make it more sparse
X_all = X_all * (np.random.uniform(0, 1, X_all.shape) > 0.8)

print('Dataset shape: {}'.format(X_all.shape))
print('Non-zeros rate: {:.05f}'.format(np.mean(X_all != 0)))
print('Classes balance: {:.03f} / {:.03f}'.format(np.mean(y_all==0), np.mean(y_all==1)))

X_tr, X_te, y_tr, y_te = train_test_split(X_all, y_all, random_state=42, test_size=0.3)

  from ._conv import register_converters as _register_converters


Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Dataset shape: (10625, 784)
Non-zeros rate: 0.04039
Classes balance: 0.469 / 0.531


In [2]:
# Baseline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
for model in [
    LogisticRegression(), 
    RandomForestClassifier(n_jobs=-1, n_estimators=200)]:
    model.fit(X_tr, y_tr)
    predictions = model.predict(X_te)
    acc = accuracy_score(y_te, predictions)
    print('model: {}'.format(model.__str__()))
    print('accuracy: {}'.format(acc))
    print()

model: LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)
accuracy: 0.8726474278544541

model: RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=200, n_jobs=-1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)
accuracy: 0.8710790464240903



In [10]:
# TFLR
from tflrclassifier import TFLRClassifier
for model in [TFLRClassifier(l2_weight=0.01, learning_rate=1e-2,
    batch_size=10, epoch_num=10, print_step=1000, input_type='dense')]:
    model.fit(X_tr, y_tr)
    predictions = model.predict(X_te)
    acc = accuracy_score(y_te, predictions)
    print('model: {}'.format(model.__str__()))
    print('train accuracy: {}'.format(accuracy_score(y_tr, model.predict(X_tr))))
    print('test accuracy: {}'.format(acc))
    print()

tensorboard --logdir=/tmp/tflog --port 8080
epoch: 1, global_step: 1000, loss: 0.5905632376670837
epoch: 2, global_step: 2000, loss: 0.6541094779968262
epoch: 4, global_step: 3000, loss: 0.5542104840278625
epoch: 5, global_step: 4000, loss: 0.4941420555114746
epoch: 6, global_step: 5000, loss: 0.5167779922485352
epoch: 8, global_step: 6000, loss: 0.5100672245025635
epoch: 9, global_step: 7000, loss: 0.4157087802886963
model: <tflrclassifier.TFLRClassifier object at 0x7fea34d6bbe0>
train accuracy: 0.8586795750974855
test accuracy: 0.846612296110414

