### Training the weights and then generating Bottleneck Feature generation for sample cifar10 and traffic dataset using Vgg, Resnet and Inception

In [9]:
import tensorflow as tf
tf.version.VERSION


'1.15.8'

#### Exploring cifar10 dataset

In [4]:
from sklearn.model_selection import train_test_split
from keras.datasets import cifar10
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

(X_train, y_train), (_, _) = cifar10.load_data()
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

In [3]:
print(X_train.shape)
print(y_train.shape)
print(X_val.shape)
print(y_val.shape)

(40000, 32, 32, 3)
(40000, 1)
(10000, 32, 32, 3)
(10000, 1)


#### Checking GPU Access

Here we have installed tensorflow-directml to access our AMD radeon graphics

TensorFlow with DirectML enables training and inference of complex machine learning models on a wide range of DirectX 12-compatible hardware

In [16]:
import tensorflow as tf
tf.config.experimental.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:DML:0', device_type='DML')]

In [17]:
tf.test.is_gpu_available()

True

#### Importing Necessary Modules

In [56]:
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.applications.inception_v3 import InceptionV3
from keras.applications.vgg16 import VGG16
from keras.layers import Input, AveragePooling2D
from sklearn.model_selection import train_test_split
from keras.models import Model
from keras.datasets import cifar10
import pickle
#import tensorflow as tf
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

from tensorflow.keras import datasets, layers, models, losses

from keras.layers import Dense
from keras.activations import softmax
from keras.utils import to_categorical


import keras.backend as K

#### Defining the dataset to train, network to choose and set batch_size and height, width and channel of input 

In [57]:

# Make bottleneck features for either cifar10 or traffic dataset. 
dataset = 'cifar10'

# he model to bottleneck, ex. 'vgg', 'inception', or 'resnet
network = 'vgg'

# The batch size for the generator
batch_size = 16


h, w, ch = 224, 224, 3
if network == 'inception':
    h, w, ch = 299, 299, 3
    from keras.applications.inception_v3 import preprocess_input

img_placeholder = tf.placeholder("uint8", (None, 32, 32, 3))
resize_op = tf.image.resize_images(img_placeholder, (h, w), method=0)

#### Defining our generator Function to yield our batches of our Sample and batches of our labels

generator takes a session, data, labels, and batch size as input and generates batches of preprocessed data and corresponding labels using TensorFlow operations. The generator is used to feed the data into the model during the prediction phase.



In [58]:

def gen(session, data, labels, batch_size):
    def _f():
        start = 0
        end = start + batch_size
        n = data.shape[0]
 
        while True:
            X_batch = session.run(resize_op, {img_placeholder: data[start:end]})
            X_batch = preprocess_input(X_batch)
            y_batch = labels[start:end]
            
            # One-hot encode the labels
            y_batch = to_categorical(y_batch, num_classes=10)
            
            start += batch_size
            end += batch_size
            if start >= n:
                start = 0
                end = batch_size

            print(start, end)
            yield (X_batch, y_batch)

    return _f

#### Creating model

creates the selected CNN model (ResNet50, VGG16, or InceptionV3) and returns the model instance.

Modified our Vgg to train it on our cifar10 dataset


In [59]:
def create_model():
    input_tensor = Input(shape=(h, w, ch))
    if network == 'vgg':
        model = VGG16(input_tensor=input_tensor, include_top=True)
        x = model.output
        x = Dense(10, activation='softmax')(x)
        model = Model(model.input, x)
    elif network == 'inception':
        model = InceptionV3(input_tensor=input_tensor, include_top=False)
        x = model.output
        x = AveragePooling2D((8, 8), strides=(8, 8))(x)
        model = Model(model.input, x)
    else:
        model = ResNet50(input_tensor=input_tensor, include_top=False)
    return model


#### Loading the dataset selected either cifar10 or traffic signs and generating bottleneck features and training the weights 

loading the dataset (either CIFAR10 or a traffic signs) and splits it into training and validation sets

defining the output file names for saving the bottleneck features.

creating a TensorFlow session and sets it as the default session.

Training our Vgg network on cifar10 dataset

For the training dataset and validation dataset, the model.predict_generator method is called, passing the generator function and the number of samples in the training dataset. The resulting bottleneck features are saved along with the corresponding labels in a pickle file.

Steps i have taken 313 for training and 79 for validation using Steps = len(X_train)/batch_size in order to cover whole dataset.


In [60]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

if dataset == 'cifar10':
    (X_train, y_train), (_, _) = cifar10.load_data()
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)
else:
    with open('data/train.p', mode='rb') as f:
        train = pickle.load(f)
    X_train, X_val, y_train, y_val = train_test_split(train['features'], train['labels'], test_size=0.33, random_state=0)

train_output_file = "{}_{}_{}.p".format(network, dataset, 'bottleneck_features_train')
validation_output_file = "{}_{}_{}.p".format(network, dataset, 'bottleneck_features_validation')

print("Resizing to", (w, h, ch))
print("Saving to ...")
print(train_output_file)
print(validation_output_file)

with tf.Session() as sess:
    K.set_session(sess)
    K.set_learning_phase(1)

    model = create_model()
    
    model.compile(optimizer='adam', loss=losses.categorical_crossentropy, metrics= ['accuracy'])
    
    # Running the training on a small sample of data (due to limited computational resources) 
    # in order to check our code
    X_train = X_train[:1000]
    y_train = y_train[:1000]
    X_val = X_val[:100]
    y_val = y_val[:100]

    print('Bottleneck training')
    train_gen = gen(sess, X_train, y_train, batch_size)
    model.fit_generator(train_gen(), steps_per_epoch=len(X_train) // batch_size, epochs=1)
    # predict_generator Generates predictions for the input samples from a data generator
    bottleneck_features_train = model.predict_generator(train_gen(), steps=2)
    data = {'features': bottleneck_features_train, 'labels': y_train}
    pickle.dump(data, open(train_output_file, 'wb'))

    print('Bottleneck validation')
    val_gen = gen(sess, X_val, y_val, batch_size)
    bottleneck_features_validation = model.predict_generator(val_gen(), steps=2)
    data = {'features': bottleneck_features_validation, 'labels': y_val}
    pickle.dump(data, open(validation_output_file, 'wb'))

Resizing to (224, 224, 3)
Saving to ...
vgg_cifar10_bottleneck_features_train.p
vgg_cifar10_bottleneck_features_validation.p
model.output:- Tensor("dense_5/Softmax:0", shape=(?, 10), dtype=float32)
Bottleneck training
Epoch 1/1
16 32
32 48
48 64
64 80
80 96
96 112
112 128
128 144
144 160
160 176
176 192
192 208
 1/62 [..............................] - ETA: 6:45 - loss: 2.3007 - accuracy: 0.1250208 224
 2/62 [..............................] - ETA: 5:20 - loss: 2.3055 - accuracy: 0.0938224 240
 3/62 [>.............................] - ETA: 4:48 - loss: 2.3005 - accuracy: 0.1042240 256
 4/62 [>.............................] - ETA: 4:31 - loss: 2.3041 - accuracy: 0.0781256 272
 5/62 [=>............................] - ETA: 4:20 - loss: 2.3041 - accuracy: 0.0875272 288
 6/62 [=>............................] - ETA: 4:10 - loss: 2.3045 - accuracy: 0.0729288 304
 7/62 [==>...........................] - ETA: 4:03 - loss: 2.3053 - accuracy: 0.0804304 320
 8/62 [==>...........................] - ET

#### Reading the pickle file generated on our sample dataset and exploring the features and labels

In [61]:
import pickle
import collections, numpy as np

training_file = "vgg_cifar10_bottleneck_features_train.p"
validation_file = "vgg_cifar10_bottleneck_features_validation.p"

with open(training_file, 'rb') as f:
    train_data = pickle.load(f)
with open(validation_file, 'rb') as f:
    validation_data = pickle.load(f)

In [62]:
print("Training Bottleneck features shape :-",train_data['features'].shape)
print("Training Bottleneck labels shape :-",train_data['labels'].shape)
print("Validation Bottleneck features shape :-",validation_data['features'].shape)
print("Validation Bottleneck labels shape :-",validation_data['labels'].shape)
print("Unique Classes :-", np.unique(train_data['labels']))
labels = train_data['labels'].reshape(train_data['labels'].shape[0])
counter = collections.Counter(labels)
print("Number of Examples in each Class :-", counter)

Training Bottleneck features shape :- (32, 10)
Training Bottleneck labels shape :- (1000, 1)
Validation Bottleneck features shape :- (32, 10)
Validation Bottleneck labels shape :- (100, 1)
Unique Classes :- [0 1 2 3 4 5 6 7 8 9]
Number of Examples in each Class :- Counter({7: 111, 8: 106, 4: 105, 9: 103, 3: 100, 6: 100, 0: 98, 5: 97, 1: 93, 2: 87})
