In [1]:
# Imports
import os
import numpy as np
import cv2
import pandas as pd 
from glob import glob
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger, ReduceLROnPlateau, EarlyStopping, TensorBoard
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split

In [2]:
# Declaring Global varialbes
global height
global width
global num_classes

# Seeding
np.random.seed(42)
tf.random.set_seed(42)

# Hyper-Prarams
height = 256
width = 320
batch_size = 16
lr = 1e-4
num_epochs = 10

# File Paths
model_path = os.path.join("files","model.h5")
csv_path = os.path.join("files","log.csv")
dataset_path = "Stanford Car Dataset"

In [3]:
# Create Direcotry for all the Files
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [4]:
# Load Labels and Return Names of Classes
def load_labels(path):
    df = pd.read_csv(os.path.join(path,"names.csv"),header=None) # Reading the csv file
    print(df.head())
    names = df[0].tolist() #convert it to list
    return names

In [5]:
# loades the images, anno, labels and returns them in list
def loaddata(path, classes, train=True):
    images = []
    bboxes = []
    labels = []
    
    if train == True:
        df = pd.read_csv(os.path.join(path,"anno_train.csv"),header=None)# read the annotation csv
    else:
        df = pd.read_csv(os.path.join(path,"anno_test.csv"),header=None)# read the annotation csv
    for index, row in df.iterrows():
        name = row[0]# image name
        x1 = int(row[1])
        y1 = int(row[2])
        x2 = int(row[3])
        y2 = int(row[4]) # annots 
        label = int(row[5])# label index
        
        label_name = classes[label-1]
        if train == True:
            image = os.path.join(path, "car_data", "car_data", "train", label_name, name)#get the image from name 
        else:
            image = os.path.join(path, "car_data", "car_data", "test", label_name, name)
        
        bbox = [x1, y1, x2, y2] # annots
        
        images.append(image)
        bboxes.append(bbox)
        labels.append(label)
    
    return images, bboxes, labels
            

In [6]:
# load the dataset and split into train test validate
def load_dataset(path,classes,split=0.1):
    train_images, train_bboxes, train_labels = loaddata(path,classes,train=True)
    
    split_size = int(len(train_images) * split) # split size is 10% of lenght
    
    train_images, valid_images = train_test_split(train_images,test_size=split_size,random_state=42)
    train_bboxes, valid_bboxes = train_test_split(train_bboxes,test_size=split_size,random_state=42)
    train_labels, valid_labels = train_test_split(train_labels,test_size=split_size,random_state=42)
    
    test_images, test_bboes, test_labels = loaddata(path, classes, train=False)
    return (train_images,train_bboxes,train_labels),(valid_images,valid_bboxes,valid_labels),(test_images,test_bboes,test_labels)
    

In [7]:
def read_images_bbox(path, bbox, label_i):
    path = path.decode()
    di = path.replace("/","-")
    dirs = di.replace(os.sep,'/')
    #Resize the image
    image = cv2.imread(dirs, cv2.IMREAD_COLOR)
#     try:
#         image.shape
#         print("checked for shape".format(image.shape))
#     except AttributeError:
#         print(dirs+"shape not found")
    h, w, _ = image.shape
    image = cv2.resize(image, (width,height))#resize with height and width
    image = (image - 127.5) / 127.5
    image = image.astype(np.float32)
    
    #Bounding Box
    x1,y1,x2,y2 = bbox
    
    #normalizing the coords
    normx1 = float(x1/w)
    normy1 = float(y1/h)
    normx2 = float(x2/w)
    normy2 = float(y2/h)
    norm_bbox =  np.array([normx1,normy1,normx2,normy2], dtype=np.float32)
    
    label = [0] * num_classes
    label[label_i-1] = 1
    class_label = np.array(label, dtype=np.float32)
    
    return image, norm_bbox, class_label

In [8]:
#convert the image,bbox,label into TF dataset format
def parse(image, bbox, label):
    image,bbox,label = tf.numpy_function(read_images_bbox,[image,bbox,label],[tf.float32,tf.float32,tf.float32])
    image.set_shape((height,width,3))
    bbox.set_shape((4))
    label.set_shape((num_classes))
    return (image),(bbox,label)

In [9]:
# store the vales in format of tf.data.Dataset with specified format
def tf_dataset(images, bboxes, labesl, batch=8):
    ds = tf.data.Dataset.from_tensor_slices((images,bboxes,labesl))
    ds = ds.map(parse).batch(batch).prefetch(10) # apply batch and prefetch after map conv
    return ds

In [10]:
create_dir("files") # create the files folder

In [5]:
classes = load_labels(dataset_path) # load labels 
num_classes = len(classes) # no of classes

                            0
0  AM General Hummer SUV 2000
1         Acura RL Sedan 2012
2         Acura TL Sedan 2012
3        Acura TL Type-S 2008
4        Acura TSX Sedan 2012


In [12]:
(train_images,train_bboxes,train_labels),(valid_images,valid_bboxes,valid_labels),(test_images,test_bboes,test_labels) = load_dataset(dataset_path,classes,split=0.2)
print(f"Train: {len(train_images)} - {len(train_bboxes)} - {len(train_labels)}")
print(f"Train: {len(valid_images)} - {len(valid_bboxes)} - {len(valid_labels)}")
print(f"Train: {len(test_images)} - {len(test_bboes)} - {len(test_labels)}")

Train: 6516 - 6516 - 6516
Train: 1628 - 1628 - 1628
Train: 8041 - 8041 - 8041


In [13]:
train_ds = tf_dataset(train_images,train_bboxes,train_labels,batch=batch_size)
valid_ds = tf_dataset(valid_images,valid_bboxes,valid_labels,batch=batch_size)

In [14]:
# test the annotation and image 
for x, [b, y] in train_ds:
    idx = 7
    image = x[idx].numpy() * 255.0
    x1 = int(b[idx][0] * image.shape[1])
    y1 = int(b[idx][1] * image.shape[0])
    x2 = int(b[idx][2] * image.shape[1])
    y2 = int(b[idx][3] * image.shape[0])
    image = cv2.rectangle(image, (x1, y1), (x2, y2), (0, 0, 255), 1)
    
    text_x = x1
    text_y = y1-10
    font_size = 1
    text = f"{classes[np.argmax(y[idx])]}"
    cv2.putText(image, text, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, font_size, (255, 0, 0), 1)
    
    cv2.imwrite("1.png", image)
    break

# Model Creation

In [15]:
# Imports
from tensorflow.keras import layers as L
from tensorflow.keras.models import Model
from tensorflow.keras.applications import MobileNetV2

In [16]:
# Creating the Model
def build_model(input_shape, num_classes=196):
    inputs = L.Input(input_shape)
    backbone = MobileNetV2(# use the MobileNetV2 as BaseModel
        include_top=False,
        weights="imagenet",
        input_tensor=inputs,
        alpha = 1.0
    )
    backbone.trainable = True
    backbone.summary()# Summary fo the model
    
    #MOdify the Model (Detection Head)
    x = backbone.output
    x = L.Conv2D(256,kernel_size=1,padding="same")(x)
    x = L.BatchNormalization()(x)
    x = L.Activation("relu")(x)
    x = L.GlobalAveragePooling2D()(x)
    x = L.Dropout(0.5)(x)
    bbox = L.Dense(4,activation="sigmoid",name="bbox")(x)
    label = L.Dense(num_classes,activation="softmax",name="label")(x)
    
    model = Model(inputs=[inputs],outputs=[bbox,label])
    return model

In [17]:
model = build_model((height,width,3))
model.compile(
    loss={
        "bbox": "binary_crossentropy",
        "label": "categorical_crossentropy"
    },
    optimizer=Adam(lr)
)
callbacks = [
        ModelCheckpoint(model_path, verbose=1, save_best_only=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-7, verbose=1),
        CSVLogger(csv_path, append=True),
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=False),
]


Model: "mobilenetv2_1.00_224"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 256, 320, 3  0           []                               
                                )]                                                                
                                                                                                  
 Conv1 (Conv2D)                 (None, 128, 160, 32  864         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 bn_Conv1 (BatchNormalization)  (None, 128, 160, 32  128         ['Conv1[0][0]']                  
                                )                                              

 ization)                                                                                         
                                                                                                  
 block_3_expand_relu (ReLU)     (None, 64, 80, 144)  0           ['block_3_expand_BN[0][0]']      
                                                                                                  
 block_3_pad (ZeroPadding2D)    (None, 65, 81, 144)  0           ['block_3_expand_relu[0][0]']    
                                                                                                  
 block_3_depthwise (DepthwiseCo  (None, 32, 40, 144)  1296       ['block_3_pad[0][0]']            
 nv2D)                                                                                            
                                                                                                  
 block_3_depthwise_BN (BatchNor  (None, 32, 40, 144)  576        ['block_3_depthwise[0][0]']      
 malizatio

                                                                                                  
 block_6_project_BN (BatchNorma  (None, 16, 20, 64)  256         ['block_6_project[0][0]']        
 lization)                                                                                        
                                                                                                  
 block_7_expand (Conv2D)        (None, 16, 20, 384)  24576       ['block_6_project_BN[0][0]']     
                                                                                                  
 block_7_expand_BN (BatchNormal  (None, 16, 20, 384)  1536       ['block_7_expand[0][0]']         
 ization)                                                                                         
                                                                                                  
 block_7_expand_relu (ReLU)     (None, 16, 20, 384)  0           ['block_7_expand_BN[0][0]']      
          

 block_10_depthwise_BN (BatchNo  (None, 16, 20, 384)  1536       ['block_10_depthwise[0][0]']     
 rmalization)                                                                                     
                                                                                                  
 block_10_depthwise_relu (ReLU)  (None, 16, 20, 384)  0          ['block_10_depthwise_BN[0][0]']  
                                                                                                  
 block_10_project (Conv2D)      (None, 16, 20, 96)   36864       ['block_10_depthwise_relu[0][0]']
                                                                                                  
 block_10_project_BN (BatchNorm  (None, 16, 20, 96)  384         ['block_10_project[0][0]']       
 alization)                                                                                       
                                                                                                  
 block_11_

 block_14_expand_relu (ReLU)    (None, 8, 10, 960)   0           ['block_14_expand_BN[0][0]']     
                                                                                                  
 block_14_depthwise (DepthwiseC  (None, 8, 10, 960)  8640        ['block_14_expand_relu[0][0]']   
 onv2D)                                                                                           
                                                                                                  
 block_14_depthwise_BN (BatchNo  (None, 8, 10, 960)  3840        ['block_14_depthwise[0][0]']     
 rmalization)                                                                                     
                                                                                                  
 block_14_depthwise_relu (ReLU)  (None, 8, 10, 960)  0           ['block_14_depthwise_BN[0][0]']  
                                                                                                  
 block_14_

In [18]:
model.fit(
    train_ds,
    epochs=num_epochs,
    validation_data=valid_ds,
    callbacks=callbacks
    )

Epoch 1/10
Epoch 1: val_loss improved from inf to 5.32604, saving model to files\model.h5
Epoch 2/10
Epoch 2: val_loss improved from 5.32604 to 4.87283, saving model to files\model.h5
Epoch 3/10
Epoch 3: val_loss improved from 4.87283 to 4.47890, saving model to files\model.h5
Epoch 4/10
Epoch 4: val_loss improved from 4.47890 to 3.98948, saving model to files\model.h5
Epoch 5/10
Epoch 5: val_loss improved from 3.98948 to 3.49623, saving model to files\model.h5
Epoch 6/10
Epoch 6: val_loss improved from 3.49623 to 3.05377, saving model to files\model.h5
Epoch 7/10
Epoch 7: val_loss improved from 3.05377 to 2.68766, saving model to files\model.h5
Epoch 8/10
Epoch 8: val_loss improved from 2.68766 to 2.42058, saving model to files\model.h5
Epoch 9/10
Epoch 9: val_loss improved from 2.42058 to 2.22643, saving model to files\model.h5
Epoch 10/10
Epoch 10: val_loss improved from 2.22643 to 2.05403, saving model to files\model.h5


<keras.callbacks.History at 0x24f783b7760>