In [None]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os 

from datetime import datetime

import tensorflow as tf
import keras


In [None]:
dir_path = os.getcwd()
dir_path

In [None]:
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

In [None]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [None]:
cifar10_1 = unpickle('datasets/cifar-10/data_batch_1')

In [None]:
cifar10_dataset_folder_path = 'datasets/cifar-10/'

In [None]:
list(cifar10_1.keys())

In [None]:
len(cifar10_1[b'data']),len(cifar10_1[b'labels'])

In [None]:
cifar10_1[b'batch_label']

In [None]:
cifar10_Label_names = unpickle('datasets/cifar-10/batches.meta')
cifar10_Label_names[b'label_names']

In [None]:
label_names = []
for x in cifar10_Label_names[b'label_names']:
    label = x.decode()
    label_names.append(label)

In [None]:
label_names

In [None]:
def load_cfar10_batch(cifar10_dataset_folder_path, batch_id):
    with open(cifar10_dataset_folder_path + '/data_batch_' + str(batch_id), mode='rb') as file:
        # note the encoding type is 'latin1'
        batch = pickle.load(file, encoding='latin1')
        
    features = batch['data'].reshape((len(batch['data']), 3, 32, 32)).transpose(0, 2, 3, 1)
    labels = batch['labels']
        
    return features, labels

In [None]:
def display_stats(cifar10_dataset_folder_path, batch_id, sample_id):
    features, labels = load_cfar10_batch(cifar10_dataset_folder_path, batch_id)
    
    if not (0 <= sample_id < len(features)):
        print('{} samples in batch {}.  {} is out of range.'.format(len(features), batch_id, sample_id))
        return None

    print('\nStats of batch #{}:'.format(batch_id))
    print('# of Samples: {}\n'.format(len(features)))
    
    label_counts = dict(zip(*np.unique(labels, return_counts=True)))
    for key, value in label_counts.items():
        print('Label Counts of [{}]({}) : {}'.format(key, label_names[key].upper(), value))
    
    sample_image = features[sample_id]
    sample_label = labels[sample_id]
    
    print('\nExample of Image {}:'.format(sample_id))
    print('Image - Min Value: {} Max Value: {}'.format(sample_image.min(), sample_image.max()))
    print('Image - Shape: {}'.format(sample_image.shape))
    print('Label - Label Id: {} Name: {}'.format(sample_label, label_names[sample_label]))
    
    plt.imshow(sample_image)

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import numpy as np

# Explore the dataset
batch_id = 3
sample_id = 7000
display_stats(cifar10_dataset_folder_path, batch_id, sample_id)

In [None]:
def normalize(x):

    min_val = np.min(x)
    max_val = np.max(x)
    x = (x-min_val) / (max_val-min_val)
    return x

In [None]:
def one_hot_encode(x):

    encoded = np.zeros((len(x), 10))
    
    for idx, val in enumerate(x):
        encoded[idx][val] = 1
    
    return encoded

In [None]:
def _preprocess_and_save(normalize, one_hot_encode, features, labels, filename):
    features = normalize(features)
    # one hot encoding only for categorical_crossentropy and categorical_accuracy
    #labels = one_hot_encode(labels)

    pickle.dump((features, labels), open(filename, 'wb'))

In [None]:
def preprocess_and_save_data(cifar10_dataset_folder_path, normalize, one_hot_encode):
    n_batches = 5
    valid_features = []
    valid_labels = []
    all_features = []
    all_labels = []

    for batch_i in range(1, n_batches + 1):
        features, labels = load_cfar10_batch(cifar10_dataset_folder_path, batch_i)
        
        # find index to be the point as validation data in the whole dataset of the batch (10%)
        index_of_validation = int(len(features) * 0.1)

        valid_features.extend(features[-index_of_validation:])
        valid_labels.extend(labels[-index_of_validation:])
        all_features.extend(features[:-index_of_validation])
        all_labels.extend(labels[:-index_of_validation])

    # preprocess the all stacked validation dataset
    _preprocess_and_save(normalize, one_hot_encode,
                         np.array(valid_features), np.array(valid_labels),
                         'datasets/preprocess_validation.p')
                                 
    _preprocess_and_save(normalize, one_hot_encode,
                         np.array(all_features), np.array(all_labels),
                         'datasets/preprocess_all.p')
    

In [None]:
preprocess_and_save_data(cifar10_dataset_folder_path, normalize, one_hot_encode)

In [None]:
valid_features, valid_labels = pickle.load(open('datasets/preprocess_validation.p', mode='rb'))
print(valid_features.dtype)
print(valid_features.shape)

In [None]:
train_features, train_labels = pickle.load(open('datasets/preprocess_all.p', mode='rb'))
print(train_features.dtype)
print(train_features.shape)

In [None]:
def build_model(target_size):

    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.BatchNormalization(input_shape=(target_size, target_size, 3)))
    model.add(tf.keras.layers.Conv2D(64, (5, 5), padding='same', activation='relu'))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2,2)))
    model.add(tf.keras.layers.Dropout(0.25))

    model.add(tf.keras.layers.BatchNormalization(input_shape=(target_size, target_size, 3)))
    model.add(tf.keras.layers.Conv2D(128, (5, 5), padding='same', activation='relu'))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(tf.keras.layers.Dropout(0.25))

    model.add(tf.keras.layers.BatchNormalization(input_shape=(target_size, target_size, 3)))
    model.add(tf.keras.layers.Conv2D(256, (5, 5), padding='same', activation='relu'))
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2,2)))
    model.add(tf.keras.layers.Dropout(0.25))

    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(256))
    model.add(tf.keras.layers.Activation('relu'))
    model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.Dense(10))
    model.add(tf.keras.layers.Activation('softmax'))

    return model


In [None]:
model = build_model(32)

model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss='sparse_categorical_crossentropy',
    metrics=['sparse_categorical_accuracy'])


In [None]:
model.summary()

In [None]:
def trainableLayers(model):
    
        for layer in model.layers:
            layer.trainable = True  
    

In [None]:
trainableLayers(model)

In [None]:
earlystopping = tf.keras.callbacks.EarlyStopping(monitor='val_sparse_categorical_accuracy')

In [None]:
tensorboard = tf.keras.callbacks.TensorBoard(log_dir=logdir, histogram_freq=0)

In [None]:
def fit_all_batches(xEpochs):

        model.fit(
            train_features, train_labels,
            epochs=xEpochs,
            batch_size=32,
            validation_data=(valid_features, valid_labels),
            callbacks=[tensorboard] #[tensorboard, earlystopper]
        )

In [None]:
# For a better accuracy, increase the number of Epochs
fit_all_batches(20)

In [None]:
sample_image = valid_features[3:4]

In [None]:
sample_image.shape

In [None]:
y_pred = model.predict(sample_image)
y_pred

In [None]:
y_pred_flat = y_pred.flatten()
y_pred_flat

In [None]:
label_names

In [None]:
def predictions():
    
    sample_image = valid_features[107:108]
    print(sample_image.shape)
    y_pred = model.predict(sample_image)
    y_pred_flat = y_pred.flatten()
    
    results = np.column_stack((label_names, y_pred_flat))
    print(results)
    plt.imshow(valid_features[107])

In [None]:
valid_features[107].shape

In [None]:
predictions()

In [None]:
def nottrainableLayers(model):
    
        for layer in model.layers:
            layer.trainable = False  

In [None]:
nottrainableLayers(model)

In [None]:
for l in model.layers:
    print(l.name, l.trainable)

In [None]:
keras_file = "models/keras_CIFAR_Seq_Sparse_model/keras_CIFAR_Seq_Sparse.h5"
model.save(keras_file)

In [None]:
# Convert to TensorFlow Lite model.
converter = tf.lite.TFLiteConverter.from_keras_model_file(keras_file)
converter.post_training_quantize=True
tflite_model = converter.convert()
open("models/keras_CIFAR_Seq_Sparse_model/keras_CIFAR_Seq_Sparse.tflite", "wb").write(tflite_model)