## Import Libraries

In [1]:
import os
import scipy.io as sio
import shutil as sh
import random
import cv2
from utils import load_data

## Pre-Processing the Data

##### Croped the image using bounding box to reduce background noise

In [2]:
# crop image using bounding box
# cropping will reduce noise from background
def crop_image(data, image_scr, image_des):
    os.makedirs(image_des, exist_ok=True)
    for row in data:
        im = cv2.imread(image_scr+row[0])
        im_crop = im[row[3]:row[5], row[2]:row[4]]
        cv2.imwrite(image_des+row[0], im_crop)

## Train/Validation Split
##### 80/20 rule was applied
##### Each image was moved into the sub-class directory for keras data augmentation

In [3]:
# create data sub-folder for keras training
def create_subdirectory(data, classes,image_path):
    num_train_samples = 0 # count number of trainning samples 
    
    split_ratio = 0.8     # train test split using 80/20 rule
    for class_name in classes:   # 80/20 split for each class
            temp = [(row[0],row[1])  for row in data if classes[row[1]-1] == class_name]
            tag = temp[0][1]
            num_samples = len(temp)
            num_train = int(round(num_samples * split_ratio))
            train_indexes = random.sample(range(num_samples), num_train)
        
            num_train_samples += num_train
        
            for i in range(num_samples):
                if i in train_indexes:
                    os.makedirs("data/training/%04d" %tag + "/" , exist_ok=True)
                    sh.move(image_path + temp[i][0][:], "data/training/%04d" %tag + "/" + temp[i][0][:])
                else:
                    os.makedirs("data/validation/%04d" %tag + "/", exist_ok=True)
                    sh.move(image_path + temp[i][0][:], "data/validation/%04d" %tag + "/" + temp[i][0][:])
    return num_train_samples

## Crop Training/validation data

In [9]:
print("Processing training data...")
training_data_src = "cars_train/"
training_data_des = "data/train_crop/"
training_data = load_data("devkit/cars_train_annos.mat", "data", "train")
classes = load_data("devkit/cars_meta.mat", "class", "train")
crop_image(training_data, training_data_src, training_data_des)

Processing training data...


In [10]:
print("Number of training samples are:")
create_subdirectory(training_data, classes, training_data_des)

Number of training samples are:


6509

## Crop Training/validation data

In [11]:
print("Processing testing data..")
testing_data_src = "cars_test/"
testing_data_des = "data/testing/"
testing_data = load_data("devkit/cars_test_annos.mat", "data", "test")
crop_image(testing_data, testing_data_src, testing_data_des)
print("Data processing completed.")

Processing testing data..
Data processing completed.


In [12]:
sh.rmtree("data/train_crop/")