In [2]:
#import dependencies
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython import display
from sklearn.model_selection import train_test_split
import pickle
%matplotlib inline

In [5]:
# load the data
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

data1 = unpickle("./input/data_batch_1")
data2 = unpickle("./input/data_batch_2")
data3 = unpickle("./input/data_batch_3")
data4 = unpickle("./input/data_batch_4")
data5 = unpickle("./input/data_batch_5")
label_data = unpickle('./input/batches.meta')[b'label_names']

labels1 = data1[b'labels']
data1 = data1[b'data'] * 1.0
labels2 = data2[b'labels']
data2 = data2[b'data'] * 1.0
labels3 = data3[b'labels']
data3 = data3[b'data'] * 1.0
labels4 = data4[b'labels']
data4 = data4[b'data']  * 1.0
labels5 = data5[b'labels']
data5 = data5[b'data']  * 1.0

In [6]:
# Combine the remaining four arrays to use as training data
X_tr = np.concatenate([data1, data2, data3, data4, data5], axis=0)
X_tr = np.dstack((X_tr[:, :1024], X_tr[:, 1024:2048], X_tr[:, 2048:])) / 1.0
X_tr = (X_tr - 128) / 255.0
X_tr = X_tr.reshape(-1, 32, 32, 3)

y_tr = np.concatenate([labels1, labels2, labels3, labels4, labels5], axis=0)

In [7]:
# set number of classes
num_classes = len(np.unique(y_tr))

print("X_tr", X_tr.shape)
print("y_tr", y_tr.shape)

X_tr (50000, 32, 32, 3)
y_tr (50000,)


In [8]:
# import the test data
test_data = unpickle("./input/test_batch")

X_test = test_data[b'data']
X_test = np.dstack((X_test[:, :1024], X_test[:, 1024:2048], X_test[:, 2048:])) / 1.0
X_test = (X_test - 128) / 255.0
X_test = X_test.reshape(-1, 32, 32, 3)
y_test = np.asarray(test_data[b'labels'])

In [9]:
# split into test and validation
X_te, X_cv, y_te, y_cv = train_test_split(X_test, y_test, test_size=0.5, random_state=1)

print("X_te", X_te.shape)
print("X_cv", X_cv.shape)
print("y_te", y_te.shape)
print("y_cv", y_cv.shape)

X_te (5000, 32, 32, 3)
X_cv (5000, 32, 32, 3)
y_te (5000,)
y_cv (5000,)


In [10]:
# Batch generator
def get_batches(X, y, batch_size, crop=False, distort=True):
    # Shuffle X,y
    shuffled_idx = np.arange(len(y))
    np.random.shuffle(shuffled_idx)
    i, h, w, c = X.shape
    
    # Enumerate indexes by steps of batch_size
    for i in range(0, len(y), batch_size):
        batch_idx = shuffled_idx[i:i+batch_size]
        X_return = X[batch_idx]
        
        # optional random crop of images
        if crop:
            woff = (w - 24) // 4
            hoff = (h - 24) // 4
            startw = np.random.randint(low=woff,high=woff*2)
            starth = np.random.randint(low=hoff,high=hoff*2)
            X_return = X_return[:,startw:startw+24,starth:starth+24,:]
       
        # do random flipping of images
        coin = np.random.binomial(1, 0.5, size=None)
        if coin and distort:
            X_return = X_return[...,::-1,:]
        
        yield X_return, y[batch_idx]

In [11]:
# config
epochs = 3                   # how many epochs
batch_size = 128
steps_per_epoch = X_tr.shape[0] / batch_size