# Train and Test Data

## Subset of the Data

In [None]:
import os
import shutil
import glob
import math
import re
import sys
# import vmmr_utils # 
import matplotlib.pyplot as plt
%matplotlib inline

cars = {
    "honda_civic_1998": ["honda_civic_1997", "honda_civic_1998"], # available "honda_civic_1999"
    "honda_accord_1997": ["honda_accord_1996", "honda_accord_1997"], # available "honda_accord_1998"
    "ford_f150_2006": ["ford_f150_2005", "ford_f150_2006", "ford_f150_2007"], # available "ford_f150_2008"
    "chevrolet_silverado_2004": ["chevrolet_silverado_2003", "chevrolet_silverado_2004"], # available "chevrolet_silverado_2005"
    "toyota_camry_2014": ["toyota_camry_2012", "toyota_camry_2013", "toyota_camry_2014", "toyota_camry_le_2012", "toyota_camry_le_2013", "toyota_camry_le_2014", "toyota_camry_se_2012", "toyota_camry_se_2013", "toyota_camry_xle_2012", "toyota_camry_xle_2013"],
    "nissan_altima_2014": ["nissan_altima_2013", "nissan_altima_2014", "nissan_altima_2015"], # available "nissan_altima_2016"
    "toyota_corolla_2013": ["toyota_corolla_2011", "toyota_corolla_2012", "toyota_corolla_2013", "toyota_corolla_ce_2012", "toyota_corolla_le_2012", "toyota_corolla_le_2013", "toyota_corolla_s_2011", "toyota_corolla_s_2012"],
    "dodge_ram_2001": ["dodge_ram_1500_2000", "dodge_ram_1500_2001", "dodge_ram_1500_1999", "dodge_ram_1500_1998", "dodge_ram_1500_1997", "dodge_ram_1500_1996", "dodge_ram_1500_1995"],
    "gmc_sierra_2012": ["gmc_sierra_1500_2007", "gmc_sierra_1500_2008", "gmc_sierra_1500_2009", "gmc_sierra_1500_2010", "gmc_sierra_1500_2011", "gmc_sierra_1500_2012", "gmc_sierra_1500_2013", "gmc_sierra_2500_2007", "gmc_sierra_2500_2008", "gmc_sierra_2500_2009", "gmc_sierra_2500_2010", "gmc_sierra_2500_2011", "gmc_sierra_2500_2012", "gmc_sierra_2500_2013"],
    "chevrolet_impala_2008": ["chevrolet_impala_2007", "chevrolet_impala_2008", "chevrolet_impala_2009"]
}


full_dataset_path = "Dataset/SubsetVMMR"
stolen_cars_path = "Dataset/Most_Stolen_Cars"

if os.path.exists(stolen_cars_path):
    shutil.rmtree(stolen_cars_path)
else:
    os.makedirs(stolen_cars_path)

for directory, car_list in cars.items():
    print("Creating", directory)
    car_directory_name = os.path.join(stolen_cars_path, directory)
    os.makedirs(car_directory_name)
    for car in car_list:
        path = os.path.join(full_dataset_path, car, "")
        files = glob.glob(path + '*.jpg')
        for file in files:
            shutil.copy(file, car_directory_name)

In [None]:
import numpy as np
train_val_test_ratio = (.7,.1,.2) # 70/10/20 Data Split
test_folder = 'Dataset/test/'
train_folder = 'Dataset/train/'
val_folder = 'Dataset/val/'

files = glob.glob(stolen_cars_path + '/*/*.jpg') # Get all the files in the directory

files_names = os.listdir(stolen_cars_path) # Get all the files in the directory

#Remove Existing Folders if they exist
if os.path.exists(test_folder):
    shutil.rmtree(test_folder)
    
if os.path.exists(train_folder):
    shutil.rmtree(train_folder)
    
if os.path.exists(val_folder):
    shutil.rmtree(val_folder)
    
#Remake Category Folders in both Train and Test Folders
for file in files_names:
    os.makedirs(train_folder + file)
    os.makedirs(test_folder + file)
    os.makedirs(val_folder + file)
    
#Split the data into Train and Test Folders
for file in files:
    if np.random.rand(1) < train_val_test_ratio[0]:
        shutil.copy(file, train_folder + file.split('\\')[1] + '/' + file.split('\\')[2])
    elif np.random.rand(1) < train_val_test_ratio[1]:
        shutil.copy(file, val_folder + file.split('\\')[1] + '/' + file.split('\\')[2])
    else:
        shutil.copy(file, test_folder + file.split('\\')[1] + '/' + file.split('\\')[2])
        
print("Done")

In [None]:
# check ratio of train, val, test
train_files = glob.glob(train_folder + '/*/*.jpg')
val_files = glob.glob(val_folder + '/*/*.jpg')
test_files = glob.glob(test_folder + '/*/*.jpg')

print("Train: ", len(train_files))
print("Val: ", len(val_files))
print("Test: ", len(test_files))

## Sample Augmentation

In [None]:
import random
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

#Select a random image and follow the next step
datagen = ImageDataGenerator(rotation_range=45,  # randomly rotate images in the range (degrees, 0 to 180)
                             width_shift_range=0.2,  # randomly shift images horizontally (fraction of total width)
                             height_shift_range=0.2,  # randomly shift images vertically (fraction of total height)
                             zoom_range=0.3,  # Randomly zoom image
                             vertical_flip=True, # randomly flip images
                             horizontal_flip=True,  # randomly flip images
                             fill_mode="nearest") # Fill in missing pixels with the nearest filled value
#Load example image
file_list = glob.glob("Dataset/test/*/*") # Get all the files in the directory
img_path = random.choice(file_list) # Select a random image
img = load_img(img_path) # Load the image
car_class = img_path.split("/")[1] # Get the car class
plt.imshow(img) # Show the image
plt.axis("off") # Remove axis
plt.title("Original " + car_class, fontsize=16) # Set title

img = img_to_array(img) # Convert image to numpy array
img = img.reshape((1,) + img.shape) # Reshape image
#Apply different augmentation techniques
n_augmentations = 4 # Number of augmentations to apply
plt.figure(figsize=(15, 6))     # Set figure size
i = 0 # Counter
for batch in datagen.flow(img,  # Image to augment 
                          batch_size=1,  # Size of batch
                          seed=21): # Seed for reproducibility
    
    plt.subplot(2, int(np.ceil(n_augmentations * 1. / 2)), i + 1) # Create subplot
    plt.imshow(array_to_img(batch[0])) # Show image
    plt.axis("off") # Remove axis
    plt.suptitle("Augmented " + car_class, fontsize=16) # Set title    
    
    i += 1
    if i >= n_augmentations:
        break

# Train DataSet Generator with Augmentation

In [None]:
#Train DataSet Generator with Augmentation
train_generator = ImageDataGenerator(preprocessing_function=preprocess_input)

train_generator = train_generator.flow_from_directory(
        train_folder, # this is the target directory
        target_size=(WIDTH, HEIGHT), # all images will be resized to 299x299
        batch_size=BATCH_SIZE,
        class_mode='categorical')

#Validation DataSet Generator with Augmentation
val_generator = ImageDataGenerator(preprocessing_function=preprocess_input)
 
val_generator = val_generator.flow_from_directory(
        val_folder, # this is the target directory
        target_size=(WIDTH, HEIGHT), # all images will be resized to 299x299
        batch_size=BATCH_SIZE,
        class_mode='categorical')

#Test DataSet Generator with Augmentation
test_generator = ImageDataGenerator(preprocessing_function=preprocess_input)
 
test_generator = test_generator.flow_from_directory(
        test_folder, # this is the target directory
        target_size=(WIDTH, HEIGHT), # all images will be resized to 299x299
        batch_size=BATCH_SIZE,
        class_mode='categorical')

## Oversampling Minority Classes in Training Set

In [None]:
def data_augment(data_dir): # data_dir is the path to the folder containing the images to augment
    list_of_images = os.listdir(data_dir) # Get list of images
    datagen = ImageDataGenerator(rotation_range=45, 
        horizontal_flip=True, 
        fill_mode="nearest")
    for img_name in list_of_images: # Loop through images
        tmp_img_name = os.path.join(data_dir, img_name) # Get full path to image
        img = load_img(tmp_img_name)
        img = img_to_array(img)
        img = img.reshape((1,) + img.shape)

        batch = datagen.flow(img,  # Image to augment
            batch_size=1, 
            seed=21,
            save_to_dir=data_dir, 
            save_prefix=img_name.split(".jpg")[0] + "augmented", 
            save_format="jpg")

        batch.next()

classes_to_augment = [
    "toyota_camry_2014",
    "nissan_altima_2014",
    "toyota_corolla_2013",
    "gmc_sierra_2012"]

for class_names in classes_to_augment:
    print("Currently Augmenting:", class_names)
    data_dir = os.path.join(train_folder, class_names)
    data_augment(data_dir)

## Resize Images to 299x299

In [None]:
#resize all images to 299x299

from PIL import Image
import os, sys

path = "Dataset/train/"
dirs = os.listdir( path )

def resize():
    for item in dirs:
        if os.path.isfile(path+item + "/"):
            im = Image.open(path+item)
            f, e = os.path.splitext(path+item)
            imResize = im.resize((299,299), Image.ANTIALIAS)
            # replace the original image with the resized one
            imResize.save(f + '.jpg', 'JPEG', quality=90)
             
resize()

## Distribution of Selected Classes

In [None]:
import pygal
line_chart = pygal.Bar(height=300)
line_chart.title = 'Training Class Distribution'
for o in os.listdir(train_folder):
    line_chart.add(o, len(os.listdir(os.path.join(train_folder, o))))

# show the plot
line_chart.render_in_browser()

## Optimizations for CPU

In [None]:
from keras.models import Sequential, Model, load_model
from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard, CSVLogger
from keras import optimizers, models
from keras.layers import Dense, Dropout, GlobalAveragePooling2D
from keras import applications
from keras import backend as K
import tensorflow as tf
import os

NUM_PARALLEL_EXEC_UNITS = 8 # Number of parallel execution units

config = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=NUM_PARALLEL_EXEC_UNITS, # Use 8 threads for intra-op parallelism
                        inter_op_parallelism_threads=2, # Use 2 threads for inter-op parallelism
                        allow_soft_placement=True, # Allow placement on CPU if a GPU is not available
                        device_count = {'CPU': NUM_PARALLEL_EXEC_UNITS}) # Create a config object with the options specified above.

session = tf.compat.v1.Session(config=config) # Create a session with the above options specified.

## MKL and OpenMP

In [None]:
K.set_session(session)

os.environ["OMP_NUM_THREADS"] = str(NUM_PARALLEL_EXEC_UNITS)
os.environ["KMP_BLOCKTIME"] = "1"
os.environ["KMP_SETTINGS"] = "1"
os.environ["KMP_AFFINITY"]= "granularity=fine,verbose,compact,1,0"

## Baseline Model

In [None]:
# Initialize InceptionV3 with transfer learning
base_model = applications.InceptionV3(weights='imagenet',  # Load weights pre-trained on ImageNet.
                                include_top=False,  # Do not include the ImageNet classifier at the top.
                                input_shape=(WIDTH, HEIGHT,3)) # Our images are 299x299 with 3 channels (RGB)

# add a global spatial average pooling layer
x = base_model.output # Get the output of the last layer of the InceptionV3 model

x = GlobalAveragePooling2D()(x) # Add a pooling layer
# and a dense layer
x = Dense(1024, activation='relu')(x) # Add a fully connected layer with 1024 hidden units and ReLU activation

In [None]:
train_flow = train_generator
predictions = Dense(len(train_flow.class_indices), activation='softmax')(x) # Add a final softmax layer for classification

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions) # Create a model based on the inputs and outputs

# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers: # Loop through all the layers in the base model
    layer.trainable = False # Set the layer to be non-trainable (weights will not be updated)

# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer=optimizers.Adam(lr=0.001), metrics=['accuracy', 'top_k_categorical_accuracy'], loss='categorical_crossentropy') # Compile the model
model.summary() # Print a summary of the model

## Start Training / Training Callbacks

In [None]:
import math
from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard, CSVLogger
val_flow = val_generator

top_layers_file_path = "models/iv3-top-layers.h5"
checkpoint = ModelCheckpoint(top_layers_file_path, monitor='loss', verbose=1, save_best_only=True, mode='min')
tb = TensorBoard(log_dir='./logs', batch_size=val_flow.batch_size, write_graph=True, update_freq='batch')
early = EarlyStopping(monitor="loss", mode="min", patience=5)
csv_logger = CSVLogger('./logs/iv3-log.csv', append=True)

history = model.fit_generator(train_flow, 
                              epochs=10,
                              verbose=1,
                              validation_data=val_flow,
                              validation_steps=math.ceil(val_flow.samples/val_flow.batch_size),
                              steps_per_epoch=math.ceil(train_flow.samples/train_flow.batch_size),
                              callbacks=[checkpoint, early, tb, csv_logger])


## Evaluate Model

In [None]:
model.load_weights(top_layers_file_path)
test_flow = test_generator
loss, acc, top_5 = model.evaluate_generator(
    test_flow,
    verbose = True,
    steps=math.ceil(test_flow.samples/test_flow.batch_size))
print("Loss: ", loss)
print("Acc: ", acc)
print("Top 5: ", top_5)

### Write Labels File

In [None]:
label = [k for k,v in train_flow.class_indices.items()]
with open('iv3-labels.txt', 'w+') as file:
    file.write("\n".join(label))

### Test Model with Sample image

In [None]:
from tensorflow.keras.preprocessing import image
import numpy as np
import glob
import random

file_list = glob.glob("Dataset/test/*/*")
img_path = random.choice(file_list)
img_cat = os.path.split(os.path.dirname(img_path))[1]
print("Image Category: ", img_cat)
img = image.load_img(img_path, target_size=(299, 299))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)

preds = model.predict(x)
print("Raw Predictions: ", preds)

top_x = 3
top_args = preds[0].argsort()[-top_x:][::-1]
preds_label = [label[p] for p in top_args]
print("\nTop " + str(top_x) + " confidence: " + " ".join(map(str, sorted(preds[0])[-top_x:][::-1])))
print("Top " + str(top_x) + " labels: " + " ".join(map(str, preds_label)))

## Transform Keras Model to Tensorflow Frozen Graph