In [1]:
from torchvision.ops import nms
import torch
import numpy as np
from torch.utils.data import DataLoader, Dataset
from glob import glob
from matplotlib import pyplot as plt
import pandas as pd
import matplotlib.patches as mpatches
from PIL import Image
from torchvision import transforms


device = 'cuda' if torch.cuda.is_available() else 'cpu'

DATA_ROOT = 'D:\pycharm\DL-Pytorch\Dataset\open-data-car-truck\images/'
IMAGE_ROOT = f'{DATA_ROOT}/images'
DF_RAW = df = pd.read_csv('D:\pycharm\DL-Pytorch\Dataset\open-data-car-truck\df.csv')
print(DF_RAW.head())

df = df[df['ImageID'].isin(df['ImageID'].unique().tolist())]
label2target = {l:t+1 for t,l in enumerate(DF_RAW['LabelName'].unique())}
label2target['background'] = 0
target2label = {t:l for l,t in label2target.items()}
background_class = label2target['background']
num_classes = len(label2target)

            ImageID  Source LabelName  Confidence      XMin      XMax  \
0  0000599864fd15b3  xclick       Bus           1  0.343750  0.908750   
1  00006bdb1eb5cd74  xclick     Truck           1  0.276667  0.697500   
2  00006bdb1eb5cd74  xclick     Truck           1  0.702500  0.999167   
3  00010bf498b64bab  xclick       Bus           1  0.156250  0.371250   
4  00013f14dd4e168f  xclick       Bus           1  0.287500  0.999375   

       YMin      YMax  IsOccluded  IsTruncated  ...  IsDepiction  IsInside  \
0  0.156162  0.650047           1            0  ...            0         0   
1  0.141604  0.437343           1            0  ...            0         0   
2  0.204261  0.409774           1            1  ...            0         0   
3  0.269188  0.705228           0            0  ...            0         0   
4  0.194184  0.999062           0            1  ...            0         0   

   XClick1X  XClick2X  XClick3X  XClick4X  XClick1Y  XClick2Y  XClick3Y  \
0  0.421875  0.34

In [2]:
normalize = transforms.Normalize(
    mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225]
)
denormalize = transforms.Normalize(
    mean=[-0.485/0.229, -0.456/0.224, -0.406/0.255],
    std=[1/0.229, 1/0.224, 1/0.255]
)
def preprocess_image(img):
    img = torch.tensor(img).permute(2,0,1)
    img = normalize(img)
    return img.to(device).float()

def find(item, original_list):
    results = []
    for o_i in original_list:
        if item in o_i:
            results.append(o_i)
    if len(results) == 1:
        return results[0]
    else:
        return results

In [3]:
class OpenDataset(torch.utils.data.Dataset):
    w, h = 300, 300
    def __init__(self, df, image_dir=IMAGE_ROOT):
        self.image_dir = image_dir
        self.files = glob(self.image_dir+'/*')
        self.df = df
        self.image_infos = df.ImageID.unique()

    def __getitem__(self, ix):
        # load images and masks
        image_id = self.image_infos[ix]
        img_path = find(image_id, self.files)
        img = Image.open(img_path).convert("RGB")
        img = np.array(img.resize((self.w, self.h), resample=Image.BILINEAR))/255.
        data = df[df['ImageID'] == image_id]
        labels = data['LabelName'].values.tolist()
        data = data[['XMin','YMin','XMax','YMax']].values
        data[:,[0,2]] *= self.w
        data[:,[1,3]] *= self.h
        boxes = data.astype(np.uint32).tolist() # convert to absolute coordinates
        return img, boxes, labels

    def collate_fn(self, batch):
        images, boxes, labels = [], [], []
        for item in batch:
            img, image_boxes, image_labels = item
            img = preprocess_image(img)[None]
            images.append(img)
            boxes.append(torch.tensor(image_boxes).float().to(device)/300.)
            labels.append(torch.tensor([label2target[c] for c in image_labels]).long().to(device))
        images = torch.cat(images).to(device)
        return images, boxes, labels

    def __len__(self):
        return len(self.image_infos)

In [4]:
from sklearn.model_selection import train_test_split
trn_ids, val_ids = train_test_split(df.ImageID.unique(), test_size=0.1, random_state=99)
trn_df, val_df = df[df['ImageID'].isin(trn_ids)], df[df['ImageID'].isin(val_ids)]
len(trn_df), len(val_df)

train_ds = OpenDataset(trn_df)
test_ds = OpenDataset(val_df)

train_loader = DataLoader(train_ds, batch_size=4, collate_fn=train_ds.collate_fn, drop_last=True)
test_loader = DataLoader(test_ds, batch_size=4, collate_fn=test_ds.collate_fn, drop_last=True)

In [5]:
def train_batch(inputs, model, criterion, optimizer):
    model.train()
    N = len(train_loader)
    images, boxes, labels = inputs
    _regr, _clss = model(images)
    loss = criterion(_regr, _clss, boxes, labels)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    return loss

@torch.no_grad()
def validate_batch(inputs, model, criterion):
    model.eval()
    images, boxes, labels = inputs
    _regr, _clss = model(images)
    loss = criterion(_regr, _clss, boxes, labels)
    return loss

In [6]:
from model import SSD300, MultiBoxLoss
from detect import *

model = SSD300(num_classes, device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-5)
criterion = MultiBoxLoss(priors_cxcy=model.priors_cxcy, device=device)




Loaded base model.





In [None]:
train_loss_epochs = []
val_loss_epochs = []
n_epochs=10

for epoch in range(n_epochs):
    print(epoch)
    _n = len(train_loader)
    trn_loss = []
    val_loss = []
    for ix, inputs in enumerate(train_loader):
        loss = train_batch(inputs, model, criterion, optimizer)
        pos = (epoch + (ix+1)/_n)
        trn_loss.append(loss.item())
    train_loss_epochs.append(np.average(trn_loss))

    _n = len(test_loader)
    for ix,inputs in enumerate(test_loader):
        loss = validate_batch(inputs, model, criterion)
        pos = (epoch + (ix+1)/_n)
        val_loss.append(loss.item())
val_loss_epochs.append(np.average(val_loss))

epochs = np.arange(n_epochs)+1
plt.plot(epochs, train_loss_epochs, 'bo', label='Training loss')
plt.plot(epochs, val_loss_epochs, 'r', label='Test loss')
plt.title('Training and Test loss over increasing epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid('off')
plt.show()

0


In [None]:
def show_bbs(im, bbs, clss):
    fig, ax = plt.subplots(ncols=2, nrows=1, figsize=(6, 6))
    ax[0].imshow(im)
    ax[0].grid(False)
    ax[0].set_title('Original image')
    if len(bbs) == 0:
        ax[1].imshow(im)
        ax[1].set_title('No objects')
        plt.show()
        return
    ax[1].imshow(im)
    for ix, (xmin, ymin, xmax, ymax) in enumerate(bbs):
        rect = mpatches.Rectangle(
                (xmin, ymin), xmax-xmin, ymax-ymin,
                fill=False,
                edgecolor='red',
                linewidth=1)
        ax[1].add_patch(rect)
        centerx = xmin # + new_w/2
        centery = ymin + 20# + new_h - 10
        plt.text(centerx, centery, clss[ix].replace('@', ''),fontsize = 10,color='red')
    ax[1].grid(False)
    ax[1].set_title('Predicted bounding box and class')
    plt.show()

from random import choice
image_paths = glob.glob(f'{DATA_ROOT}/images/*')
image_id = choice(test_ds.image_infos)
print(image_id)
img_path = find(image_id, test_ds.files)
original_image = Image.open(img_path, mode='r')
original_image = original_image.convert('RGB')

In [None]:
image_paths = glob.glob(f'{DATA_ROOT}/images/*')
for _ in range(20):
    image_id = choice(test_ds.image_infos)
    img_path = find(image_id, test_ds.files)
    original_image = Image.open(img_path, mode='r')
    bbs, labels, scores = detect(original_image, model, min_score=0.9, max_overlap=0.5,top_k=200, device=device)
    labels = [target2label[c.item()] for c in labels]
    label_with_conf = [f'{l} @ {s:.2f}' for l,s in zip(labels,scores)]
    print(bbs, label_with_conf)
    show_bbs(original_image, bbs=bbs, clss=label_with_conf)#, text_sz=10)