In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip -q /content/drive/MyDrive/datasets/GP-total-finetune.zip -d ./
#!unzip -q /content/drive/MyDrive/datasets/GP-180-roboflow-eval.zip -d ./
!unzip -q /content/drive/MyDrive/datasets/GP-180-saurabh-eval.zip -d ./
!ln -s /content/drive/MyDrive/datasets/GP-180

In [None]:
%%shell

pip install cython
# Install pycocotools, the version by default in Colab
# has a bug fixed in https://github.com/cocodataset/cocoapi/pull/354
pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'

In [None]:
%%shell

# Download TorchVision repo to use some files from
# references/detection
git clone https://github.com/pytorch/vision.git
cd vision
git checkout v0.3.0

cp references/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/engine.py ../
cp references/detection/coco_utils.py ../

In [None]:
import torch
import torchvision
import torchvision.ops as ops
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader
from torchvision.models.detection import faster_rcnn, rpn, FasterRCNN, backbone_utils,mask_rcnn
from torchvision import models,transforms

import cv2
import matplotlib.pyplot as plt

import os
from engine import train_one_epoch, evaluate
import utils
import transforms as T
import math
from tqdm import tqdm
from copy import deepcopy
import numpy as np
import matplotlib.pyplot as plt

In [None]:
epsilon = 1e-7

In [None]:
CPU = torch.device('cpu')
GPU = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
hard_recogs = {
    "3009" : ["3011","3006"],
    "3030" : ["3002","3029","#3008"],
    "810" : ["716"],
    "715" : ["805"],
    "881" : ["882"],
    "886" : ["887"],
    "900" : ["898"],
    "908" : ["910"],
    "875" : ["874"],
    "882" : ["881"],
    "960" : ["938"],
    "805" : ["715"],
    "3220" : ["3217"],
    "3251" : ["3248"],
    "3255" : ["3205"],
    "3254" : ["3240","3232"],
    "3240" : ["3254"],
    "3247" : ["3234"],
    "3027" : ["3007"],
    "3007" : ["3027"],
    "3174" : ["3133","3144"]
}

In [None]:
class GroceryProducts(Dataset):
    def __init__(self, root, transforms = None):
        self.root = root
        self.transforms = transforms
        self.imgs = list(sorted(os.listdir(os.path.join(root, "images"))))
        self.labels = list(sorted(os.listdir(os.path.join(root, "labels"))))

    def __getitem__(self, idx):
        img_path = os.path.join(self.root, "images", self.imgs[idx])
        label_path = os.path.join(self.root, "labels", self.labels[idx])
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
        img = cv2.resize(img,(410,410))
        img = img / 255.0
        H ,W = img.shape[0], img.shape[1]
        img = torch.from_numpy(img).float()
        img = img.permute(2,0,1)
        boxes = []
        annots = []
        txt_reader = open(label_path,'r')
        for line in txt_reader:
            entry = line.split()
            annots.append(entry[0])
            coords = map(float,entry[1:])
            x, y, w, h = coords
            x_min = (x - w/2) * W
            y_min = (y - h/2) * H
            x_max = (x + w/2) * W
            y_max = (y + h/2) * H
            boxes.append([x_min, y_min, x_max, y_max])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)   
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])     
        image_id = torch.tensor([idx])
        
        # As we consider only one class for detection
        labels = torch.ones((boxes.shape[0],),dtype = torch.int64)
        # None of the instances is crowd (interpret as 'background')
        iscrowd = torch.zeros((boxes.shape[0],),dtype = torch.int64)

        target = {}
        target["boxes"] = boxes
        target["image_id"] = image_id
        target["labels"] = labels
        target["area"] = area
        target["iscrowd"] = iscrowd
        target["annots"] = annots
        
        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

In [None]:
class RetailDataset(Dataset):
    def __init__(self,root_dir,transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.img_list = sorted(os.listdir(root_dir))

    def __len__(self):
        return len(self.img_list)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir,self.img_list[idx])
        image = cv2.imread(img_path)
        #image = load_tf_image(img_path)
        if image is None:
            print(img_path,idx,self.img_list[idx])
        
        image = cv2.resize(image,(224,224))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = torch.from_numpy(image)
        image = image.float() / 255
        image = torch.permute(image,[2,0,1])
        if self.transform:
            image = self.transform(image)
        return image

In [None]:
#files = sorted(os.listdir("/content/GP-180/train"))
REF_DIR = "/content/GP-180/train"

resnet18 = models.resnet18(pretrained = False)
res18 = deepcopy(resnet18)

path_to_embed_weights = "/content/drive/MyDrive/ML_MODELS/resnet18_embed_ep43_full_tune.pt"

tsfm = transforms.Compose([transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])
stocks_ds = RetailDataset(REF_DIR,tsfm)
stocks_dl = DataLoader(stocks_ds, batch_size = 64, shuffle = False)

In [None]:
ref_itr = iter(stocks_ds)
img = next(ref_itr)
print(img.shape)

In [None]:
class ResNet18(nn.Module):
    def __init__(self,FREEZE = False):
        super(ResNet18,self).__init__()
        self.entrypoint = nn.Sequential(res18.conv1,
                                        res18.bn1,
                                        res18.relu,
                                        res18.maxpool)
        #self.downConv = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
        self.layer1 = res18.layer1
        self.layer2 = res18.layer2
        self.layer3 = res18.layer3
        self.layer4 = res18.layer4
        self.maxpool_b3 = nn.MaxPool2d(kernel_size = (14,14))
        self.maxpool_b4 = nn.MaxPool2d(kernel_size = (7,7))
        #self.maxpool_b2 = nn.MaxPool2d(kernel_size = (28,28))
        #self.fc1 = nn.Linear(in_features = 768,out_features = 1024, bias = True)

        if FREEZE:
            self.freeze_backbone()

    def freeze_backbone(self):
        self.entrypoint.requires_grad_(False)
        self.layer1.requires_grad_(False)
        self.layer2.requires_grad_(False)
        self.layer3.requires_grad_(False)
        #self.layer4.requires_grad_(False)

    def forward(self,X):
        X0 = self.entrypoint(X)
        X1 = self.layer1(X0)
        X2 = self.layer2(X1)
        X3 = self.layer3(X2)
        X4 = self.layer4(X3)
        x3_flat = self.maxpool_b3(X3)
        x4_flat = self.maxpool_b4(X4)
        out = torch.cat([x3_flat,x4_flat],dim = 1)
        out = out.view(X.shape[0],-1)
        #out = self.fc1(out)

        return out

encoder = ResNet18()

chkpt = torch.load(path_to_embed_weights)
encoder.load_state_dict(chkpt["model"])
encoder.to(GPU)
print("Temporarily moved the encoder to GPU.")

In [None]:
def extract_embeddings(dataloader, model, D):

    embedder_dim = D

    with torch.no_grad():
        model.eval()
        embeddings = torch.zeros((len(dataloader.dataset), embedder_dim)).to(GPU)
    
        k = 0
        for (nb,batch) in enumerate(tqdm(dataloader)):
            batch = batch.to(GPU)
            batch_size = batch.shape[0]
            output = model(batch).view(batch_size,-1)
            output = output / torch.linalg.norm(output,ord=2,dim =1,keepdim = True)
            embeddings[k : k + batch_size] = output
            k += batch_size

    return embeddings


embed_size = 768
ref_embeddings = extract_embeddings(stocks_dl,encoder,embed_size)

print("\nShape of ref_embeddings is",ref_embeddings.shape)
print("Device of ref_embeddings",ref_embeddings.device)

In [None]:
ref_img_list = sorted(os.listdir(REF_DIR))
print(ref_img_list)
print(len(ref_img_list))

In [None]:
image_to_embed = {}
for i in range(len(ref_img_list)):
    image_to_embed[ref_img_list[i][:-4]] = ref_embeddings[i]

#print(image_to_embed)

In [None]:
def re_rank_by_BRISK(target_img,top_k):
    #BRISK
    k = len(top_k)
    ref_img_arr = []
    for imgFile in top_k:
        img_path = os.path.join(REF_DIR,imgFile + '.jpg')
        img = cv2.imread(img_path,flags = cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img,(410,410))
        #display(img,(6,4))
        ref_img_arr.append(img)

    BRISK = cv2.BRISK_create()

    #print("\ntarget_img shape",target_img.shape)
    #print("target_img",target_img.min(),target_img.max(),target_img.mean())
    #plt.imshow(target_img)
    #plt.show()
    
    
    keypoints_target, descriptors_target = BRISK.detectAndCompute(target_img,None)

    bf = cv2.BFMatcher(cv2.NORM_L1, crossCheck=True)
    keypoints_ref = []
    descriptors_ref = []
    matches_arr = []
    for ref_img in ref_img_arr:
        keypoints_curr, descriptors_curr = BRISK.detectAndCompute(ref_img,None)
        matches = bf.match(descriptors_target,descriptors_curr)
        matches_arr.append(len(matches))

    re_ranked_values, re_ranked_indices = torch.as_tensor(matches_arr).sort(descending = True)
    #print("Matched keypoints are",re_ranked_values)
    re_ranked_top_k = [top_k[x] for x in re_ranked_indices[:k]] 
    #feature matching

    # print("Displaying the top k predictions..")
    # for imgFile in re_ranked_top_k:
    #     img_path = os.path.join(REF_DIR,imgFile + '.jpg')
    #     img = cv2.imread(img_path,flags = cv2.IMREAD_GRAYSCALE)
    #     img = cv2.resize(img,(410,410))
    #     display(img,(6,4))
        
    return re_ranked_top_k

class UnNormalize(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, tensor):
        """
        Args:
            tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
        Returns:
            Tensor: Normalized image.
        """
        for t, m, s in zip(tensor, self.mean, self.std):
            t.mul_(s).add_(m)
            # The normalize code -> t.sub_(m).div_(s)
        return tensor

def preprocess(image,transform = None):
    if isinstance(image,torch.Tensor):
        image = image.permute(1,2,0)
        image = image.cpu().numpy()
    image = cv2.resize(image,(224,224))
    image = torch.from_numpy(image)
    image = image.permute(2,0,1)
    if transform:
        image = transform(image)
    return image

def yield_top_k_matches(cropped_img, k = 5, apply_BRISK = False):
    cropped_img = preprocess(cropped_img,tsfm).unsqueeze(dim = 0).to(GPU)
    cropped_embed = encoder(cropped_img)
    
    cropped_repeat = torch.cat([cropped_embed for _ in range(len(stocks_ds))],dim = 0)
    distances = torch.linalg.norm(cropped_repeat - ref_embeddings, dim = 1)
    #assert distances.device == GPU
    values,indices = distances.sort()

    top_k = [stocks_ds.img_list[x][:-4] for x in indices[:k]] 
    
   
    #print("shape crop image",cropped_img.shape)
    #print("Global descriptor top k",top_k)
    if apply_BRISK:
        unorm = UnNormalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
        cropped_img = unorm(cropped_img)
    
        cropped_img = cropped_img[0].permute(1,2,0).cpu().numpy() * 230
        cropped_img = cropped_img.astype(np.uint8)
        cropped_img = cv2.cvtColor(cropped_img, cv2.COLOR_RGB2GRAY)
        re_ranked_top_k = re_rank_by_BRISK(cropped_img,top_k)
        top_k = re_ranked_top_k    
    
    return top_k

In [None]:
img = cv2.imread("/content/GP-180/train/268.jpg")
img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
img = cv2.resize(img,(410,410))
img = img[40:230,30:240,:]
img = img / 255.0
plt.imshow(img)
plt.show()
img = torch.from_numpy(img).float()
img = img.permute(2,0,1)
#img = img.unsqueeze(dim = 0)
#print("Shape of input image is ",img.shape)
pred_cls = yield_top_k_matches(img,k = 5,apply_BRISK = False)
print(pred_cls)

In [None]:
BATCH_SIZE = 8
FINE_TUNE_PATH_TRAIN = "/content/GP-total-finetune/train"
FINE_TUNE_PATH_TEST = "/content/GP-180-saurabh-eval"
FINE_TUNE_PATH_VAL = "/content/GP-180-saurabh-eval"
WORKERS_no = 2

train_ds = GroceryProducts(FINE_TUNE_PATH_TRAIN)
valid_ds = GroceryProducts(FINE_TUNE_PATH_VAL)
test_ds = GroceryProducts(FINE_TUNE_PATH_TEST)

train_dl = DataLoader(train_ds,
                      batch_size = BATCH_SIZE,
                      shuffle = True,
                      num_workers = WORKERS_no,
                      collate_fn = utils.collate_fn)

valid_dl = DataLoader(valid_ds,
                      batch_size = BATCH_SIZE,
                      shuffle = False,
                      num_workers = WORKERS_no // 2,
                      collate_fn = utils.collate_fn)

test_dl =  DataLoader(test_ds,
                      batch_size = BATCH_SIZE,
                      shuffle = False,
                      num_workers = WORKERS_no //2
                      ,collate_fn = utils.collate_fn)

In [None]:
def get_detection_model(num_classes,pre_trained = True):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=pre_trained)

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = faster_rcnn.FastRCNNPredictor(in_features, num_classes)

    return model

In [None]:
del model

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
# our dataset has two classes only - b
num_classes = 2

# get the model using our helper function
model = get_detection_model(num_classes,pre_trained = True)
# move model to the right device
#model.to(device)
#print("model moved to device {0}".format(device))
#print(model)

In [None]:
#model.backbone.body.requires_grad_(False)

In [None]:
for parameter in model.backbone.body.parameters():
    print(parameter.requires_grad)

In [None]:
resnet18 = models.resnet18(pretrained = True)

In [None]:
backbone_res18 = nn.Sequential(resnet18.conv1,
                               resnet18.bn1,
                               resnet18.relu,
                               resnet18.maxpool,
                               resnet18.layer1,
                               resnet18.layer2,
                               resnet18.layer3,
                               resnet18.layer4,
                               resnet18.avgpool)

In [None]:
img = torch.rand(1,3,224,224)
output = backbone_res18(img)
print(output.shape)

In [None]:
out = model.backbone.body(img)
print(out.shape)

In [None]:
backbone_res18 = backbone_utils.resnet_fpn_backbone('resnet18',pretrained = True,trainable_layers = 1)

In [None]:
model.backbone = backbone_res18

In [None]:
#model.backbone.body.requires_grad_(False)            # Freezing the backbone resnet50

In [None]:
model.rpn._pre_nms_top_n['training'] = 1000
model.rpn._post_nms_top_n['training'] = 600
model.rpn._pre_nms_top_n['testing'] = 500
model.rpn._post_nms_top_n['testing'] = 50
#model.roi_heads.nms_thresh = 0.30
#model.roi_heads.score_thresh = 0.60

In [None]:
model.to(GPU)
pass

In [None]:
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.0005,momentum=0.8, weight_decay=0.0005)
#optimizer = torch.optim.Adam(params, lr=0.005, weight_decay=0.0005)
warmup_factor = 1. / 1000
warmup_iters = min(1000, len(train_dl) - 1)

lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)
# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
#lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
#                                               step_size=3,
#                                               gamma=0.1)

In [None]:
_,_ = validate_voc_format(model,valid_ds)

And now let's train the model for 10 epochs, evaluating at the end of every epoch.

In [None]:
# let's train it for 10 epochs
num_epochs = 40
print_freq = 5
eval_freq = 2
start_epoch = 31
end_epoch = 40

for epoch in range(start_epoch,end_epoch + 1):
    # train for one epoch, printing every 10 iterations
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    for images, targets in metric_logger.log_every(train_dl, print_freq, header):
        
        images = list(image.to(GPU) for image in images)
        targets = [{k: v.to(GPU) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()

        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])

    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    if epoch % eval_freq == 0:
        validate_voc_format(model, valid_ds)

In [None]:
num_classes = 2
model = get_detection_model(num_classes,pre_trained = False)
ckpt = torch.load("/content/drive/MyDrive/ML_MODELS/faster_rcnn_res50_fpn_ft39(better-than-ft30).pt")
model.load_state_dict(ckpt["model"])
model.to(GPU)
print("detector moved to gpu")

In [None]:
#model.eval()
model.rpn._pre_nms_top_n['testing'] = 500
model.rpn._post_nms_top_n['testing'] = 30
model.roi_heads.nms_thresh = 0.30
model.roi_heads.score_thresh = 0.60

In [None]:
predictions,statistics,wrong_recogs = validate_voc_format(model,valid_ds,top_k=1)

In [None]:
predictions = validate_ISI_format(model,valid_ds,top_k = 1)

In [None]:
print(len(wrong_recogs))

In [None]:
def plot_predictions(wrong_preds):
    for i in range(len(wrong_preds)):
        gtruth = wrong_preds[i][1].cpu().permute(1,2,0).numpy()
                
        #gtruth = cv2.imread(os.path.join(_DIR,wrong_preds[i][0]))
        
        img1_path = os.path.join(REF_DIR,wrong_preds[i][2] + ".jpg")
        # print(img_path)
        img1 = cv2.imread(img1_path)
        #img2 = cv2.imread(os.path.join(ROOT_DIR,preds[i][1] + ".jpg"))
        #img3 = cv2.imread(os.path.join(ROOT_DIR,preds[i][2] + ".jpg"))

        #gtruth = cv2.cvtColor(gtruth,cv2.COLOR_BGR2RGB)
        img1 = cv2.cvtColor(img1,cv2.COLOR_BGR2RGB)
        #img2 = cv2.cvtColor(img2,cv2.COLOR_BGR2RGB)
        #img3 = cv2.cvtColor(img3,cv2.COLOR_BGR2RGB)

        fig, axs = plt.subplots(1,2,figsize = (14,6))
        axs[0].imshow(gtruth)
        axs[0].set_title(wrong_recogs[i][0])

        axs[1].imshow(img1)
        axs[1].set_title(wrong_recogs[i][2])
        
        # axs[2].imshow(img2)
        # axs[2].set_title("Top-2")
        # axs[3].imshow(img3)
        # axs[3].set_title("Top-3")

In [None]:
plot_predictions(wrong_recogs)

In [None]:
verdicts = [stat[3] for stat in statistics]
wrong_detections = 0

for verdict in verdicts:
    for x in verdict:
        if x == -1:
            wrong_detections += 1

print("No. of Wrong detections!",wrong_detections)

In [None]:
print("Mean IoU   Std. IoU")
for stat in statistics:
    print("{0:.2f}        {1:.2f}".format(stat[1].mean(),stat[1].std()))

In [None]:
plot_pred_gt_side_by_side(valid_ds,predictions,judgements = verdicts)

In [None]:
PATH = "/content/drive/MyDrive/ML_MODELS"
torch.save({"model": model.state_dict(),
            "optimizer": optimizer.state_dict(),
            "lr_scheduler":lr_scheduler.state_dict(),
            "epochs": num_epochs },os.path.join(PATH,"faster_rcnn_res50_fpn_ft39(better-than-ft30).pt"))

In [None]:
# pick one image from the test set
img, target = test_ds[0]

model.rpn._post_nms_top_n["testing"] = 50
model.roi_heads.nms_thresh = 0.40

# put the model in evaluation mode
model.eval()
with torch.no_grad():
    prediction = model([img.to(device)])

In [None]:
#display(img.permute(1,2,0).numpy())
bboxes = prediction[0]["boxes"]
gtboxes = target["boxes"]
image = img.permute(1,2,0).numpy()
thickness = 2

color1 = (255,0,0)
color2 = (0,0,255)
image1 = image.copy()
image2 = image.copy()

#plt.figure(figsize = (15,20))
fig, axs = plt.subplots(1,2,figsize = (20,30),sharex = True)
for i in range(bboxes.shape[0]):
    start_pt = (int(bboxes[i][0]),int(bboxes[i][1]))
    end_pt = (int(bboxes[i][2]),int(bboxes[i][3]))
    image1 = cv2.rectangle(image1,start_pt,end_pt,color1,thickness)

for i in range(gtboxes.shape[0]):
    start_pt = (int(gtboxes[i][0]),int(gtboxes[i][1]))
    end_pt = (int(gtboxes[i][2]),int(gtboxes[i][3]))
    image2 = cv2.rectangle(image2,start_pt,end_pt,color2,thickness)

axs[0].imshow(image1)
axs[1].imshow(image2)

plt.show()
#plot_img_with_boxes(img.permute(1,2,0).numpy(),prediction[0]["boxes"])