In [1]:
from sklearn.datasets import load_digits
from matplotlib import pyplot as plt
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from tensorflow.contrib import learn
import tensorflow as tf
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import ShuffleSplit
from sklearn import metrics
from sklearn.utils import shuffle
from sklearn import cross_validation
import itertools
import time



In [2]:
#This function will create batch of inputs with the following features 
#Maximum capacity of the queue is 10000 - which is maximum number of elements in the queue
#Min elements in queue after every dequeue

def inp_tensor_fn(dataset_split, batch_size, capacity=10000, min_after_dequeue=3000):
    
    def inp_fn():
        img_batch, labels_batch = tf.train.shuffle_batch(
            tensors = [dataset_split.images, dataset_split.labels.astype(np.int32)],
            batch_size = batch_size,
            capacity = capacity,
            min_after_dequeue=min_after_dequeue,
            enqueue_many=True,
            num_threads=4)
        features_map = {'images': img_batch}
        return features_map, labels_batch
    
    return inp_fn

In [3]:
#This will load MNIST dataset and split it into train, test and validation
#Train will contain 55k , validation will contain 5k and test will have 10k samples
m_data = tf.contrib.learn.datasets.mnist.load_mnist()




#convert image numpy arrays to tensor
#input function will create mini batches of tensor
#train data is divided into batchs of 256 whereas validation data is 5000 
train_inp = inp_tensor_fn(m_data.train, batch_size=256)
val_inp = inp_tensor_fn(m_data.validation, batch_size=5000)

Extracting MNIST-data\train-images-idx3-ubyte.gz
Extracting MNIST-data\train-labels-idx1-ubyte.gz
Extracting MNIST-data\t10k-images-idx3-ubyte.gz
Extracting MNIST-data\t10k-labels-idx1-ubyte.gz


m_data = tf.contrib.learn.datasets.mnist.load_mnist()

In [4]:
image_set = tf.contrib.layers.real_valued_column('images', dimension=784)

# Defining optimizer

In [5]:
#This optimizer supports FTRL Algo and has support for l2 regularization
op = tf.train.FtrlOptimizer(learning_rate=50.0, l2_regularization_strength=0.001)

# Implementing RBF kernel Function

In [6]:
kernel_mapper = tf.contrib.kernel_methods.RandomFourierFeatureMapper(input_dim=784, output_dim=12000, stddev=5.0, name='rffm')

In [7]:
inp_map = {image_set: [kernel_mapper]}

Training & Testing Model

In [8]:
rbf = tf.contrib.kernel_methods.KernelLinearClassifier(n_classes=10, optimizer=op, kernel_mappers=inp_map)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_tf_random_seed': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001B5FADA0940>, '_task_id': 0, '_keep_checkpoint_every_n_hours': 10000, '_session_config': None, '_environment': 'local', '_save_summary_steps': 100, '_num_ps_replicas': 0, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_keep_checkpoint_max': 5, '_master': '', '_save_checkpoints_secs': 600, '_model_dir': 'C:\\Users\\saish\\AppData\\Local\\Temp\\tmpnx2k57_i', '_is_chief': True, '_num_worker_replicas': 0, '_save_checkpoints_steps': None, '_evaluation_master': '', '_task_type': None}


In [9]:
start = int(round(time.time() * 1000))

In [10]:
 rbf.fit(input_fn=train_inp, steps=2000)

Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\saish\AppData\Local\Temp\tmpnx2k57_i\model.ckpt.
INFO:tensorflow:loss = 2.30258, step = 1
INFO:tensorflow:global_step/sec: 7.72235
INFO:tensorflow:loss = 0.248888, step = 101 (12.950 sec)
INFO:tensorflow:global_step/sec: 7.6609
INFO:tensorflow:loss = 0.163599, step = 201 (13.054 sec)
INFO:tensorflow:global_step/sec: 7.31904
INFO:tensorflow:loss = 0.123285, step = 301 (14.121 sec)
INFO:tensorflow:global_step/sec: 7.12699
INFO:tensorflow:loss = 0.100805, step = 401 (13.572 sec)
INFO:tensorflow:global_step/sec: 7.07709
INFO:tensorflow:loss = 0.0698902, step = 501 (14.1

KernelLinearClassifier(params={'kernel_mappers': {_RealValuedColumn(column_name='images', dimension=784, default_value=None, dtype=tf.float32, normalizer=None): [<tensorflow.contrib.kernel_methods.python.mappers.random_fourier_features.RandomFourierFeatureMapper object at 0x000001B58310E400>]}, 'head': <tensorflow.contrib.learn.python.learn.estimators.head._MultiClassHead object at 0x000001B58310E4E0>, 'optimizer': <tensorflow.python.training.ftrl.FtrlOptimizer object at 0x000001B58310E1D0>, 'feature_columns': {_RealValuedColumn(column_name='images_MAPPED', dimension=12000, default_value=None, dtype=tf.float32, normalizer=None)}})

In [11]:
end = int(round(time.time() * 1000))
print("--NN fitting finished in ", (end-start), "ms--------------")

--NN fitting finished in  380193 ms--------------


# Validation using evaluate method of kernelLinearClassifier

In [12]:
val = rbf.evaluate(input_fn=val_inp, steps=1)
print (val)

Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.
INFO:tensorflow:Starting evaluation at 2017-11-11-21:10:48
INFO:tensorflow:Restoring parameters from C:\Users\saish\AppData\Local\Temp\tmpnx2k57_i\model.ckpt-2000
INFO:tensorflow:Evaluation [1/1]
INFO:tensorflow:Finished evaluation at 2017-11-11-21:10:51
INFO:tensorflow:Saving dict for global step 2000: accuracy = 0.978, global_step = 2000, loss = 0.0780364
{'accuracy': 0.97799999, 'loss': 0.07803645, 'global_step': 2000}


# CROSS VALIDATION

In [13]:
def inp_tensor_fn1(dataset_split):
    
    def inp_fn1():
        data_tf = tf.convert_to_tensor(dataset_split.images)
        features_map = {'images': data_tf}
        return features_map
    
    return inp_fn1

In [14]:
val_set = m_data.validation
val_images = val_set.images
val_labels = val_set.labels.astype(np.int32)

In [15]:
v_pred = []
for i in range(5000):
    v_pred.append(val_labels[i])

In [16]:
val_inp = inp_tensor_fn1(m_data.validation)

In [17]:
v_pre = list(itertools.islice(rbf.predict(input_fn=val_inp),5000))
v_classes = [p["classes"] for p in v_pre]
score = metrics.accuracy_score(v_pred, v_classes)
print ('Test Accuracy', score)
Recall = metrics.recall_score(v_pred, v_classes, average='weighted')
print('Test Recall', Recall)
Precision = metrics.precision_score(v_pred, v_classes, average='weighted')
print('Test Precision', Precision)
F1 = metrics.f1_score(v_pred, v_classes, average='weighted')
print('F1', F1)
matrix = metrics.confusion_matrix(v_pred, v_classes)
print('Confusion Matrix', matrix)

INFO:tensorflow:Restoring parameters from C:\Users\saish\AppData\Local\Temp\tmpnx2k57_i\model.ckpt-2000
Test Accuracy 0.9802
Test Recall 0.9802
Test Precision 0.980230018318
F1 0.980182190251
Confusion Matrix [[476   1   0   1   0   0   0   1   0   0]
 [  0 561   0   0   1   0   0   1   0   0]
 [  2   5 471   1   2   0   1   2   4   0]
 [  0   0   4 479   0   3   1   3   2   1]
 [  0   1   1   0 529   0   1   1   0   2]
 [  0   1   2   5   1 424   1   0   0   0]
 [  2   1   2   0   1   1 493   0   1   0]
 [  1   1   2   0   2   1   0 540   1   2]
 [  0   2   1   5   0   4   1   0 449   0]
 [  2   2   0   3   1   0   1   6   1 479]]


In [18]:
print("Classification report for RBF classifier %s:\n%s\n"
     % (rbf, metrics.classification_report(v_pred, v_classes)))

Classification report for RBF classifier KernelLinearClassifier(params={'kernel_mappers': {_RealValuedColumn(column_name='images', dimension=784, default_value=None, dtype=tf.float32, normalizer=None): [<tensorflow.contrib.kernel_methods.python.mappers.random_fourier_features.RandomFourierFeatureMapper object at 0x000001B58310E400>]}, 'head': <tensorflow.contrib.learn.python.learn.estimators.head._MultiClassHead object at 0x000001B58310E4E0>, 'optimizer': <tensorflow.python.training.ftrl.FtrlOptimizer object at 0x000001B58310E1D0>, 'feature_columns': {_RealValuedColumn(column_name='images_MAPPED', dimension=12000, default_value=None, dtype=tf.float32, normalizer=None)}}):
             precision    recall  f1-score   support

          0       0.99      0.99      0.99       479
          1       0.98      1.00      0.99       563
          2       0.98      0.97      0.97       488
          3       0.97      0.97      0.97       493
          4       0.99      0.99      0.99       535


# TEST

In [19]:
def inp_tensor_fn1(dataset_split):
    
    def inp_fn1():
        data_tf = tf.convert_to_tensor(dataset_split.images)
        features_map = {'images': data_tf}
        print (features_map)
        return features_map
    
    return inp_fn1

In [20]:
test_set = m_data.test
test_images = test_set.images
test_labels = test_set.labels.astype(np.int32)

In [21]:
r = 10000
pred = []
for i in range(10000):
    pred.append(test_labels[i])

In [22]:
test_inp = inp_tensor_fn1(m_data.test)

In [23]:
pre = list(itertools.islice(rbf.predict(input_fn=test_inp),10000))

{'images': <tf.Tensor 'Const:0' shape=(10000, 784) dtype=float32>}
INFO:tensorflow:Restoring parameters from C:\Users\saish\AppData\Local\Temp\tmpnx2k57_i\model.ckpt-2000


In [24]:
predicted_classes = [p["classes"] for p in pre]

In [25]:
score = metrics.accuracy_score(pred, predicted_classes)
print ('Test Accuracy', score)
Error = 100-round(metrics.accuracy_score(pred, predicted_classes)*100,2)
print ('Test Error', Error)
Recall = metrics.recall_score(pred, predicted_classes, average='weighted')
print('Test Recall', Recall)
Precision = metrics.precision_score(pred, predicted_classes, average='weighted')
print('Test Precision', Precision)
F1 = metrics.f1_score(pred, predicted_classes, average='weighted')
print('F1', F1)
matrix = metrics.confusion_matrix(pred, predicted_classes)
print('Confusion Matrix', matrix)

Test Accuracy 0.979
Test Error 2.1
Test Recall 0.979
Test Precision 0.979013397511
F1 0.97898836086
Confusion Matrix [[ 973    0    0    1    0    2    1    1    2    0]
 [   0 1126    2    1    0    1    2    1    2    0]
 [   5    2 1006    2    2    1    0    6    8    0]
 [   0    0    2  993    0    5    0    3    5    2]
 [   0    0    4    0  961    0    3    1    1   12]
 [   2    0    0    7    2  875    3    1    1    1]
 [   6    2    0    1    4    7  936    1    1    0]
 [   1    8    8    1    0    1    0 1001    0    8]
 [   2    0    1    9    1    5    3    3  946    4]
 [   3    5    1    8    8    3    1    6    1  973]]


In [26]:
print("Classification report for RBF classifier %s:\n%s\n"
     % (rbf, metrics.classification_report(pred, predicted_classes)))

Classification report for RBF classifier KernelLinearClassifier(params={'kernel_mappers': {_RealValuedColumn(column_name='images', dimension=784, default_value=None, dtype=tf.float32, normalizer=None): [<tensorflow.contrib.kernel_methods.python.mappers.random_fourier_features.RandomFourierFeatureMapper object at 0x000001B58310E400>]}, 'head': <tensorflow.contrib.learn.python.learn.estimators.head._MultiClassHead object at 0x000001B58310E4E0>, 'optimizer': <tensorflow.python.training.ftrl.FtrlOptimizer object at 0x000001B58310E1D0>, 'feature_columns': {_RealValuedColumn(column_name='images_MAPPED', dimension=12000, default_value=None, dtype=tf.float32, normalizer=None)}}):
             precision    recall  f1-score   support

          0       0.98      0.99      0.99       980
          1       0.99      0.99      0.99      1135
          2       0.98      0.97      0.98      1032
          3       0.97      0.98      0.98      1010
          4       0.98      0.98      0.98       982
