### Mini-Batching Example
The minibatching is useful when the dataset is too large.

This method consists in taking at random a reduced number of the initial dataset and using this for training the algorithm

In [10]:
import math
def batches(batch_size, features, labels):
    """
    Create batches of features and labels
    :param batch_size: The batch size
    :param features: List of features
    :param labels: List of labels
    :return: Batches of (Features, Labels)
    """
    assert len(features) == len(labels)
    outout_batches = []
    out_features = []
    out_labels = []
    
    sample_size = len(features)
    for start_i in range(0, sample_size, batch_size):
        end_i = start_i + batch_size
        tmp_feat = features[start_i:end_i]
        tmp_lab = labels[start_i:end_i]
        batch = [tmp_feat, tmp_lab]
        out_features.append(tmp_feat)
        out_labels.append(tmp_lab)
        outout_batches.append(batch)
        
    return outout_batches

In [11]:
from pprint import pprint

# 4 Samples of features
example_features = [
    ['F11','F12','F13','F14'],
    ['F21','F22','F23','F24'],
    ['F31','F32','F33','F34'],
    ['F41','F42','F43','F44'],
    ['F51','F52','F53','F54'],
    ['F61','F62','F63','F64'],
    ['F71','F72','F73','F74'],
    ['F81','F82','F83','F84']]
# 4 Samples of labels
example_labels = [
    ['L11','L12'],
    ['L21','L22'],
    ['L31','L32'],
    ['L41','L42'],
    ['L51','L52'],
    ['L61','L62'],
    ['L71','L72'],
    ['L81','L82']]

# PPrint prints data structures like 2d arrays, so they are easier to read
pprint(batches(5, example_features, example_labels))

[[[['F11', 'F12', 'F13', 'F14'],
   ['F21', 'F22', 'F23', 'F24'],
   ['F31', 'F32', 'F33', 'F34'],
   ['F41', 'F42', 'F43', 'F44'],
   ['F51', 'F52', 'F53', 'F54']],
  [['L11', 'L12'],
   ['L21', 'L22'],
   ['L31', 'L32'],
   ['L41', 'L42'],
   ['L51', 'L52']]],
 [[['F61', 'F62', 'F63', 'F64'],
   ['F71', 'F72', 'F73', 'F74'],
   ['F81', 'F82', 'F83', 'F84']],
  [['L61', 'L62'], ['L71', 'L72'], ['L81', 'L82']]]]


### MNIST Dataset

Training using the MNIST dataset that cointains a list of handwitten numbers

In [12]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

# Import MNIST data
# (x_train, y_train), (x_test, y_test)
(train_features, train_labels), (test_features, test_labels) = tf.keras.datasets.mnist.load_data()


### Train the Model on a Mini-Batch

In [13]:
import numpy as np

n_input = 784  # MNIST data input (img shape: 28*28)
n_classes = 10  # MNIST total classes (0-9 digits)
learning_rate = 0.001
                                                               # Dimensions
train_features = train_features.reshape((60000, 28 * 28))      # 60000 * 784
test_features = test_features.reshape((10000, 28 * 28))        # 10000 * 784
train_features = train_features.astype('float32') / 255
test_features = test_features.astype('float32') / 255
train_labels = train_labels.astype('float32') / 255            
test_labels = test_labels.astype('float32') / 255
train_labels = tf.keras.utils.to_categorical(train_labels, 10) # 60000 * 10
test_labels = tf.keras.utils.to_categorical(test_labels, 10)   # 10000 * 10


# Features and Labels
features = tf.placeholder(tf.float32, [None, n_input])         # x * 784
labels = tf.placeholder(tf.float32, [None, n_classes])         # x * 10

# Weights & bias
weights = tf.Variable(tf.random_normal([n_input, n_classes]))  # 784 * 10
bias = tf.Variable(tf.random_normal([n_classes]))              # 10 * 1

# Logits - Wx + b
logits = tf.add(tf.matmul(features, weights), bias)            # x * 10


# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)

# Calculate accuracy
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


In [15]:
batch_size = 128

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    for batch_features, batch_labels in batches(batch_size, train_features, train_labels):
        sess.run(optimizer, feed_dict={features: batch_features, labels: batch_labels})
    
    # Calculate accuracy for test dataset
    test_accuracy = sess.run(accuracy, feed_dict={features: test_features, labels: test_labels})
    
print('Test Accuracy: {}'.format(test_accuracy))

Test Accuracy: 0.6578999757766724
