In [85]:
import xml.etree.ElementTree as ET
import os
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
import random
from sklearn.model_selection import train_test_split 

In [2]:
def filelist(root, file_type):
    """Returns a fully-qualified list of filenames under root directory"""
    return [os.path.join(directory_path, f) for directory_path, directory_name, 
            files in os.walk(root) for f in files if f.endswith(file_type)]

def generate_train_df (annotation_path):
    annotations = filelist(annotation_path, '.xml')
    anno_list = []
    print(annotations)
    for anno_path in annotations:
        try: root = ET.parse(anno_path).getroot()
        except: continue
        #print(root)
        anno = {}
        try:
            anno['filename'] = Path(anno_path[:-3]+'jpg')
            anno['width'] = root.find("./size/width").text
            anno['height'] = root.find("./size/height").text
            anno['class'] = root.find("./object/name").text
            anno['xmin'] = int(root.find("./object/bndbox/xmin").text)
            anno['ymin'] = int(root.find("./object/bndbox/ymin").text)
            anno['xmax'] = int(root.find("./object/bndbox/xmax").text)
            anno['ymax'] = int(root.find("./object/bndbox/ymax").text)
            anno_list.append(anno)
        except: continue
    return pd.DataFrame(anno_list)

In [None]:
df_train = generate_train_df('/content/drive/MyDrive/techai01/dhwani/data/Final Train Dataset')

In [None]:
df_train.sample(20)

In [5]:
df_train.to_csv('Data.csv')

In [88]:
cnt = df_train['class'].value_counts()
class_dict = {}
j=0
for i in cnt.keys():
    if i not in class_dict: 
        class_dict[i] = j
        j+=1
    else: continue 

In [89]:
# we take only five labels: 
df_train['class'] = df_train['class'].apply(lambda x:  class_dict[x])

In [90]:
print(df_train.shape)
df_train.head()

(1944, 8)


Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,/content/drive/MyDrive/techai01/dhwani/data/Fi...,1080,1920,2,965,892,1048,1041
1,/content/drive/MyDrive/techai01/dhwani/data/Fi...,1080,1920,3,440,823,689,1103
2,/content/drive/MyDrive/techai01/dhwani/data/Fi...,1125,877,1,477,297,520,330
3,/content/drive/MyDrive/techai01/dhwani/data/Fi...,1125,991,3,948,317,1125,469
4,/content/drive/MyDrive/techai01/dhwani/data/Fi...,1080,1920,1,514,933,1079,1748


In [91]:
df_train = df_train.drop(labels=[50], axis=0)

In [92]:
df_train.iloc[50]

filename    /content/drive/MyDrive/techai01/dhwani/data/Fi...
width                                                    1080
height                                                   1920
class                                                       7
xmin                                                        1
ymin                                                      846
xmax                                                      460
ymax                                                     1415
Name: 51, dtype: object

In [9]:
def create_mask(bb, x):
    """Creates a mask for the bounding box of same shape as image"""
    rows,cols,*_ = x.shape
    Y = np.zeros((rows, cols))
    bb = bb.astype(np.int)
    Y[bb[0]:bb[2], bb[1]:bb[3]] = 1.
    return Y

def mask_to_bb(Y):
    """Convert mask Y to a bounding box, assumes 0 as background nonzero object"""
    cols, rows = np.nonzero(Y)
    if len(cols)==0: 
        return np.zeros(4, dtype=np.float32)
    top_row = np.min(rows)
    left_col = np.min(cols)
    bottom_row = np.max(rows)
    right_col = np.max(cols)
    return np.array([left_col, top_row, right_col, bottom_row], dtype=np.float32)

def create_bb_array(x):
    """Generates bounding box array from a train_df row"""
    return np.array([x[5],x[4],x[7],x[6]])

In [10]:
def resize_image_bb(read_path,write_path,bb,sz):
    """Resize an image and its bounding box and write image to new path"""
    im = read_image(read_path)
    im_resized = cv2.resize(im, (int(1.49*sz), sz))
    Y_resized = cv2.resize(create_mask(bb, im), (int(1.49*sz), sz))
    new_path = str(write_path/read_path.parts[-1])
    cv2.imwrite(new_path, cv2.cvtColor(im_resized, cv2.COLOR_RGB2BGR))
    return new_path, mask_to_bb(Y_resized)

In [11]:
def read_image(img_path):
    img = plt.imread(img_path)
    return img

In [93]:
for i in df_train.index:
    try: 
        img = read_image(df_train.filename.iloc[i])
    except:
         df_train.drop(labels=[i], axis=0, inplace = True)

In [76]:
df_train.shape

(1764, 10)

In [94]:
#Populating Training DF with new paths and bounding boxes
new_paths = []
new_bbs = []
train_path_resized = Path('/content/drive/MyDrive/techai01/dhwani/data/images_resized')
for index, row in df_train.iterrows():
    try:
        new_path,new_bb = resize_image_bb(row['filename'], train_path_resized, create_bb_array(row.values),300)
        new_paths.append(new_path)
        new_bbs.append(new_bb)
    except: 
        new_paths.append([])
        new_bbs.append([])
df_train['new_path'] = new_paths
df_train['new_bb'] = new_bbs

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  """


In [95]:
# modified from fast.ai
def crop(im, r, c, target_r, target_c): 
    return im[r:r+target_r, c:c+target_c]

# random crop to the original size
def random_crop(x, r_pix=8):
    """ Returns a random crop"""
    r, c,*_ = x.shape
    c_pix = round(r_pix*c/r)
    rand_r = random.uniform(0, 1)
    rand_c = random.uniform(0, 1)
    start_r = np.floor(2*rand_r*r_pix).astype(int)
    start_c = np.floor(2*rand_c*c_pix).astype(int)
    return crop(x, start_r, start_c, r-2*r_pix, c-2*c_pix)

def center_crop(x, r_pix=8):
    r, c,*_ = x.shape
    c_pix = round(r_pix*c/r)
    return crop(x, r_pix, c_pix, r-2*r_pix, c-2*c_pix)

In [96]:
def rotate_cv(im, deg, y=False, mode=cv2.BORDER_REFLECT, interpolation=cv2.INTER_AREA):
    """ Rotates an image by deg degrees"""
    r,c,*_ = im.shape
    M = cv2.getRotationMatrix2D((c/2,r/2),deg,1)
    if y:
        return cv2.warpAffine(im, M,(c,r), borderMode=cv2.BORDER_CONSTANT)
    return cv2.warpAffine(im,M,(c,r), borderMode=mode, flags=cv2.WARP_FILL_OUTLIERS+interpolation)

def random_cropXY(x, Y, r_pix=8):
    """ Returns a random crop"""
    r, c,*_ = x.shape
    c_pix = round(r_pix*c/r)
    rand_r = random.uniform(0, 1)
    rand_c = random.uniform(0, 1)
    start_r = np.floor(2*rand_r*r_pix).astype(int)
    start_c = np.floor(2*rand_c*c_pix).astype(int)
    xx = crop(x, start_r, start_c, r-2*r_pix, c-2*c_pix)
    YY = crop(Y, start_r, start_c, r-2*r_pix, c-2*c_pix)
    return xx, YY

def transformsXY(path, bb, transforms):
    if cv2.imread(str(path)) is not None: 
        x = cv2.imread(str(path)).astype(np.float32)
        x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)/255
    else: return None, None
    Y = create_mask(bb, x)
    if transforms:
        rdeg = (np.random.random()-.50)*20
        x = rotate_cv(x, rdeg)
        Y = rotate_cv(Y, rdeg, y=True)
        if np.random.random() > 0.5: 
            x = np.fliplr(x).copy()
            Y = np.fliplr(Y).copy()
        x, Y = random_cropXY(x, Y)
    else:
        x, Y = center_crop(x), center_crop(Y)
    return x, mask_to_bb(Y)

In [97]:
def create_corner_rect(bb, color='red'):
    bb = np.array(bb, dtype=np.float32)
    return plt.Rectangle((bb[1], bb[0]), bb[3]-bb[1], bb[2]-bb[0], color=color,
                         fill=False, lw=3)

def show_corner_bb(im, bb):
    plt.imshow(im)
    plt.gca().add_patch(create_corner_rect(bb))

In [98]:
df_train

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax,new_path,new_bb
0,/content/drive/MyDrive/techai01/dhwani/data/Fi...,1080,1920,2,965,892,1048,1041,/content/drive/MyDrive/techai01/dhwani/data/im...,"[139.0, 399.0, 162.0, 433.0]"
1,/content/drive/MyDrive/techai01/dhwani/data/Fi...,1080,1920,3,440,823,689,1103,/content/drive/MyDrive/techai01/dhwani/data/im...,"[129.0, 182.0, 171.0, 284.0]"
2,/content/drive/MyDrive/techai01/dhwani/data/Fi...,1125,877,1,477,297,520,330,/content/drive/MyDrive/techai01/dhwani/data/im...,"[101.0, 189.0, 112.0, 206.0]"
3,/content/drive/MyDrive/techai01/dhwani/data/Fi...,1125,991,3,948,317,1125,469,/content/drive/MyDrive/techai01/dhwani/data/im...,"[96.0, 376.0, 141.0, 446.0]"
4,/content/drive/MyDrive/techai01/dhwani/data/Fi...,1080,1920,1,514,933,1079,1748,/content/drive/MyDrive/techai01/dhwani/data/im...,"[146.0, 213.0, 272.0, 446.0]"
...,...,...,...,...,...,...,...,...,...,...
1850,/content/drive/MyDrive/techai01/dhwani/data/Fi...,1680,1080,9,1057,678,1434,860,/content/drive/MyDrive/techai01/dhwani/data/im...,"[188.0, 281.0, 238.0, 381.0]"
1851,/content/drive/MyDrive/techai01/dhwani/data/Fi...,1920,1080,3,650,229,1138,929,/content/drive/MyDrive/techai01/dhwani/data/im...,"[63.0, 151.0, 257.0, 264.0]"
1852,/content/drive/MyDrive/techai01/dhwani/data/Fi...,1920,1080,2,845,331,870,387,/content/drive/MyDrive/techai01/dhwani/data/im...,"[92.0, 197.0, 107.0, 202.0]"
1853,/content/drive/MyDrive/techai01/dhwani/data/Fi...,1905,891,3,269,43,799,776,/content/drive/MyDrive/techai01/dhwani/data/im...,"[14.0, 63.0, 260.0, 187.0]"


In [99]:
X = df_train.drop(labels = ['class'], axis=1)
Y = df_train['class']

In [100]:
X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

In [101]:
def normalize(im):
    """Normalizes images with Imagenet stats."""
    imagenet_stats = np.array([[0.485, 0.456, 0.406], [0.229, 0.224, 0.225]])
    if im is None: return np.array([[[]]])
    return (im - imagenet_stats[0])/imagenet_stats[1]

In [102]:
class RoadDataset():
    def __init__(self, paths, bb, y, transforms=False):
        self.transforms = transforms
        self.paths = paths.values
        self.bb = bb.values
        self.y = y.values
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, idx):
        path = self.paths[idx]
        y_class = self.y[idx]
        x, y_bb = transformsXY(path, self.bb[idx], self.transforms)
        if x is None: return 
        x = normalize(x)
        x = np.rollaxis(x, 2)
        return x, y_class, y_bb

In [103]:
train_ds = RoadDataset(X_train['new_path'],X_train['new_bb'] ,y_train, transforms=True)
valid_ds = RoadDataset(X_val['new_path'],X_val['new_bb'],y_val)

In [104]:
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch
import torchvision.models as models

In [105]:
batch_size = 64
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=batch_size)

In [106]:
class BB_model(nn.Module):
    def __init__(self):
        super(BB_model, self).__init__()
        resnet = models.resnet34(pretrained=True)
        layers = list(resnet.children())[:8]
        self.features1 = nn.Sequential(*layers[:6])
        self.features2 = nn.Sequential(*layers[6:])
        self.classifier = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 4))
        self.bb = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 4))
        
    def forward(self, x):
        x = self.features1(x)
        x = self.features2(x)
        x = F.relu(x)
        x = nn.AdaptiveAvgPool2d((1,1))(x)
        x = x.view(x.shape[0], -1)
        return self.classifier(x), self.bb(x)

In [107]:
def update_optimizer(optimizer, lr):
    for i, param_group in enumerate(optimizer.param_groups):
        param_group["lr"] = lr

In [108]:
def train_epocs(model, optimizer, train_dl, val_dl, epochs=10,C=1000):
    idx = 0
    for i in range(epochs):
        model.train()
        total = 0
        sum_loss = 0
        for x, y_class, y_bb in train_dl:
            batch = y_class.shape[0]
            if x is None: continue
            x = x.cuda().float()
            y_class = y_class.cuda()
            y_bb = y_bb.cuda().float()
            out_class, out_bb = model(x)
            loss_class = F.cross_entropy(out_class, y_class, reduction="sum")
            loss_bb = F.l1_loss(out_bb, y_bb, reduction="none").sum(1)
            loss_bb = loss_bb.sum()
            loss = loss_class + loss_bb/C
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            idx += 1
            total += batch
            sum_loss += loss.item()
        train_loss = sum_loss/total
        val_loss, val_acc = val_metrics(model, valid_dl, C)
        print("train_loss %.3f val_loss %.3f val_acc %.3f" % (train_loss, val_loss, val_acc))
    return sum_loss/total

In [26]:
def val_metrics(model, valid_dl, C=1000):
    model.eval()
    total = 0
    sum_loss = 0
    correct = 0 
    for x, y_class, y_bb in valid_dl:
        batch = y_class.shape[0]
        x = x.cuda().float()
        y_class = y_class.cuda()
        y_bb = y_bb.cuda().float()
        out_class, out_bb = model(x)
        loss_class = F.cross_entropy(out_class, y_class, reduction="sum")
        loss_bb = F.l1_loss(out_bb, y_bb, reduction="none").sum(1)
        loss_bb = loss_bb.sum()
        loss = loss_class + loss_bb/C
        _, pred = torch.max(out_class, 1)
        correct += pred.eq(y_class).sum().item()
        sum_loss += loss.item()
        total += batch
    return sum_loss/total, correct/total

In [None]:
model = BB_model().cuda()
parameters = filter(lambda p: p.requires_grad, model.parameters())
optimizer = torch.optim.Adam(parameters, lr=0.006)

In [None]:
train_epocs(model, optimizer, train_dl, valid_dl, epochs=15)

In [None]:
# resizing test image
im = read_image('./road_signs/images_resized/road789.png')
im = cv2.resize(im, (int(1.49*300), 300))
cv2.imwrite('./road_signs/road_signs_test/road789.jpg', cv2.cvtColor(im, cv2.COLOR_RGB2BGR))

In [None]:
# test Dataset
test_ds = RoadDataset(pd.DataFrame([{'path':'./road_signs/road_signs_test/road789.jpg'}])['path'],pd.DataFrame([{'bb':np.array([0,0,0,0])}])['bb'],pd.DataFrame([{'y':[0]}])['y'])
x, y_class, y_bb = test_ds[0]

In [None]:
xx = torch.FloatTensor(x[None,])
xx.shape

In [None]:
# prediction
out_class, out_bb = model(xx.cuda())
out_class, out_bb

In [None]:
# predicted bounding box
bb_hat = out_bb.detach().cpu().numpy()
bb_hat = bb_hat.astype(int)
show_corner_bb(im, bb_hat[0])