In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import OrderedDict
import cv2
from PIL import Image
import keras
import tensorflow as tf
from keras.utils import np_utils # For one-hot-encoding
from keras.models import Sequential # For creating sequenttial model
from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout
from keras.models import load_model # For saving and loading models

import random

In [None]:
tf.__version__

In [None]:
# avoid tensorflow to get all gpu memory
physical_devices = tf.config.list_physical_devices('GPU')
for gpu in physical_devices:
    tf.config.experimental.set_memory_growth(gpu, True)

In [None]:
categories = os.listdir('../data/raw/vehicle/train/train')
root = '../data/raw/vehicle/train/train/'

# create dict of list of images per category
train_dict = {}
for vehicle in categories:
    train_dict[vehicle] = [i for i in os.listdir(root + vehicle)]

In [None]:
# verify
print(train_dict.keys())
print(f'There are total {len(train_dict.keys())} categories of vehicles')

In [None]:
# convert dict to pandas df
train_df = pd.DataFrame.from_dict(train_dict, orient='index').sort_index()
train_df = train_df.transpose()

In [None]:
# train set
train_df.head()

In [None]:
train_df.info()

Dataset is imbalanced, let's visualize

In [None]:
cols = []
col_imgs = []
for col in train_df.columns:
    cols.append(col)
    col_imgs.append(train_df[col].count())

plt.figure(figsize=(10,6))
plt.barh(cols, col_imgs)
plt.show()

In [None]:
print("="*70)
print("Displaying 4 random image per vehicle category")
print("="*70)

# for every category in `cols`
for j in range(15):
    plt.figure(j)
    plt.figure(figsize=(20,20))
    
    # 4 images per every vehicle
    for i in range(4):
        id = "14{}".format(i+1)
        plt.subplot(int(id))
        random_file = random.choice(os.listdir(root + cols[j] + "/"))
        img = cv2.imread(root + cols[j] + "/" + random_file)
        plt.title(cols[j])
        plt.imshow(img)
plt.show()

### Prepare Data For Training

In [None]:
data = []
labels = []

In [None]:
cols = sorted(cols)

# Creating trainable 224x224 images
#                    -------
for vehicle_class in cols:
    print(vehicle_class + " started .....")
    for filename in train_df[vehicle_class]:
        try:
            # for empty cols
            if filename == None:
                pass
            else:
                image = cv2.imread("../data/raw/vehicle/train/train/{}/".format(vehicle_class) + filename)
                image_from_numpy_array = Image.fromarray(image, "RGB")
                resized_image = image_from_numpy_array.resize((224, 224))
                data.append(np.array(resized_image))

                if vehicle_class == 'Ambulance':
                    labels.append(0)
                elif vehicle_class == 'Bicycle':
                    labels.append(1)
                elif vehicle_class == 'Boat':
                    labels.append(2)
                elif vehicle_class == 'Bus':
                    labels.append(3)
                elif vehicle_class == 'Car':
                    labels.append(4)
                elif vehicle_class == 'Helicopter':
                    labels.append(5)
                elif vehicle_class == 'Limousine':
                    labels.append(6)
                elif vehicle_class == 'Motorcycle':
                    labels.append(7)
                elif vehicle_class == 'PickUp':
                    labels.append(8)
                elif vehicle_class == 'Segway':
                    labels.append(9)
                elif vehicle_class == 'Snowmobile':
                    labels.append(10)
                elif vehicle_class == 'Tank':
                    labels.append(11)
                elif vehicle_class == 'Taxi':
                    labels.append(12)
                elif vehicle_class == 'Truck':
                    labels.append(13)
                elif vehicle_class == 'Van':
                    labels.append(14)
                else:
                    print("Something is wrong.")
                
        except AttributeError:
            print("Attribute error occured for "+filename)

In [None]:
vehicle_images_224x224 = np.array(data)
labels_224x224 = np.array(labels)

# save
np.save("../data/processed/all-vehicle-224x224-images-as-arrays", vehicle_images_224x224)
np.save("../data/processed/corresponding-labels-for-all-224x224-images", labels_224x224)

In [None]:
#data = np.load("../data/processed/all-vehicle-224x224-images-as-arrays.npy")
#labels = np.load("../data/processed/corresponding-labels-for-all-224x224-images.npy")

In [None]:
print(vehicle_images_224x224.shape)
print(labels_224x224.shape)
print(np.unique(labels_224x224))

In [None]:
# Move images to `test` and `train` dir
import shutil
import os

os.mkdir("../data/working/")
os.mkdir("../data/working/test")
os.mkdir("../data/working/train")

classes = ['Segway', 'Bicycle', 'Snowmobile', 'Van', 'PickUp', 'Truck', 'Helicopter', 'Motorcycle', 
           'Tank', 'Bus', 'Taxi', 'Ambulance', 'Limousine', 'Boat', 'Car']

for dir in ["test", "train"]:
    for _class in classes:
        os.mkdir("../data/working/{}/{}".format(dir, _class))

for _class in classes:
    images = os.listdir("../data/raw/vehicle/train/train/{}".format(_class))

    test = images[:300]
    
    # downsample to 1.5k images
    if len(images) < 1500:
      train = images[300:]
    else:
      train = images[300:1500]

    # move images to test-set folder
    for image in test:
        shutil.copy("../data/raw/vehicle/train/train/{}/{}".format(_class, image), "../data/working/test/{}/{}".format(_class, image))

    # move images to train-set folder
    for image in train:
        shutil.copy("../data/raw/vehicle/train/train/{}/{}".format(_class, image), "../data/working/train/{}/{}".format(_class, image))
