In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip -q /content/drive/MyDrive/datasets/GP-total-finetune.zip -d ./
#!unzip -q /content/drive/MyDrive/datasets/GP-180-roboflow-eval.zip -d ./
!unzip -q /content/drive/MyDrive/datasets/GP-180-saurabh-eval.zip -d ./
!ln -s /content/drive/MyDrive/datasets/GP-180

In [None]:
!ln -s /content/drive/MyDrive/datasets/Grozi-3.2k-mine

In [None]:
!unzip -q /content/drive/MyDrive/datasets/Grozi3.2k_Grocery_products.zip -d ./

In [None]:
%%shell

pip install cython
# Install pycocotools, the version by default in Colab
# has a bug fixed in https://github.com/cocodataset/cocoapi/pull/354
pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'

In [None]:
%%shell

# Download TorchVision repo to use some files from
# references/detection
git clone https://github.com/pytorch/vision.git
cd vision
git checkout v0.3.0

cp references/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/engine.py ../
cp references/detection/coco_utils.py ../

In [None]:
import torch
import torchvision
import torchvision.ops as ops
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader
from torchvision.models.detection import faster_rcnn, rpn, FasterRCNN, backbone_utils,mask_rcnn
from torchvision import models,transforms

import cv2
import matplotlib.pyplot as plt
import json
import seaborn as sbn
from sklearn.manifold import TSNE
from sklearn.cluster import DBSCAN,OPTICS

import os
from engine import train_one_epoch, evaluate
import utils
import transforms as T
import math
from tqdm import tqdm
from copy import deepcopy
import numpy as np
from time import time


In [None]:
epsilon = 1e-7

In [None]:
CPU = torch.device('cpu')
GPU = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
grozi_root = "/content/Grozi-3.2k-mine"

@torch.no_grad()
def validate_voc_format(model,eval_dataset,top_k = 1,plot = False,apply_brisk = False,threshold = 0.20):
    print("Starting evaluation .....\n")
    model.eval()
    stats = []
    predictions = []
    ignoreable = []
    wrong_recogs = []
    inference_times = []
    for idx in tqdm(range(len(eval_dataset))):
        #print("\nprocessing item",idx)
        img_ts,target_ts = eval_dataset[idx]
        img_ts = img_ts.to(GPU)

        labels = target_ts["annots"]
        labels = [os.path.join(grozi_root,x[:-4]) for x in labels ]
        #print(labels)
        #labels = target_ts["labels"]
        
        uniq_labels = set(labels)
        pred_uniq_labels = set([])

        prediction_boxes = model([img_ts])[0]["boxes"].to(CPU)
        n_boxes_pred = prediction_boxes.shape[0]
        
        iou_matrix = ops.box_iou(prediction_boxes,target_ts["boxes"])
        best_ious, best_iou_indices = iou_matrix.max(1)

        correct = torch.zeros(n_boxes_pred)
        marker = torch.zeros(n_boxes_pred)
        
        ignore = 0
        
        for i in range(len(best_ious)):     # processing one image at a time
            
            tag = labels[best_iou_indices[i]]
            tag = tag[tag.rfind("/") + 1:]
            if tag == "-1":
                best_ious[i] = 0.07
                #print("-1 detected")

            if best_ious[i] <= 0.08:
                ignore +=1

            if best_ious[i] >= threshold:
                
                x1,y1,x2,y2 = prediction_boxes[i]
                x1,y1,x2,y2 = int(x1),int(y1),int(x2),int(y2)
                
                cropped = img_ts[:,y1:y2,x1:x2]
                pcls = yield_top_k_matches(cropped,k = top_k,apply_BRISK = apply_brisk)
                
                if labels[best_iou_indices[i]] in pcls:
                    correct[i] = 1
                    marker[i] = 1
                    pred_uniq_labels.add(labels[best_iou_indices[i]])
                else:
                    wrong_recogs.append((labels[best_iou_indices[i]],cropped,pcls[0]))
                    marker[i] = -1

        
        ignoreable.append(ignore)
        stats.append((correct,
                      best_ious,
                      len(labels),
                      len(uniq_labels),
                      len(pred_uniq_labels),
                      marker))
        predictions.append(prediction_boxes)

    AP = 0
    AR = 0
    aPR = 0
    for (stat_img,ignored) in zip(stats,ignoreable):
        if len(stat_img[0]):
            AR += stat_img[0].sum()/(stat_img[2] + epsilon)
            AP += stat_img[0].sum()/( len(stat_img[0]) - ignored + epsilon)
            aPR += stat_img[4]/(stat_img[3] + epsilon)

    print("\nlen of stats",len(stats))
    AP = AP/len(stats)
    AR = AR/len(stats)
    aPR = aPR/len(stats)
    Fscore = (2 * AP * AR) / (AP + AR + epsilon) 
    
    print("[M.George] mAP @{0}:      {1:.3f}".format(threshold,AP))
    print("[--------] AR @{0}:       {1:.3f}".format(threshold,AR))
    print("[--------] AF-score @{0}: {1:.3f}".format(threshold,Fscore))
    print("[M.George] PR @{0}:       {1:.3f}".format(threshold,aPR))
    return predictions,stats,wrong_recogs,AP

@torch.no_grad()
def validate_ISI_format(model,eval_dataset,top_k = 1,plot = False,apply_brisk = False):
    #print("\nStarting evaluation .....\n")
    model.eval()
    
    predictions = []
    ignoreable = []
    
    Pr = []
    Rc = []
    Fsc = []
    for idx in tqdm(range(len(eval_dataset))):
        img_ts,target_ts = eval_dataset[idx]
        img_ts = img_ts.to(GPU)
        labels = target_ts["annots"]
        #labels = target_ts["labels"]
        prediction_boxes = model([img_ts])[0]["boxes"].to(CPU)
        n_boxes_pred = prediction_boxes.shape[0]
        TP = 0
        FP = 0
        for pred_box in prediction_boxes:
            tp = 0
            entered = 0
            centre_pred = ((pred_box[0] + pred_box[2])/2 , (pred_box[1] + pred_box[3])/2)

            for (gt_box,label) in zip(target_ts["boxes"],labels):
                X1,Y1,X2,Y2 = gt_box

                if centre_pred[0] >= X1 and centre_pred[0] <= X2 and centre_pred[1] <= Y2 and centre_pred[1] >= Y1:
                    x1,y1,x2,y2 = pred_box
                    x1,y1,x2,y2 = int(x1),int(y1),int(x2),int(y2)
                
                    cropped = img_ts[:,y1:y2,x1:x2]
                    pcls = yield_top_k_matches(cropped,k = top_k,apply_BRISK = apply_brisk)

                    entered = 1
                    if label in pcls:
                        tp = 1
                        break

            if tp == 1:
                TP += 1
            elif entered == 1:
                FP += 1
        
        Precision = TP/(TP + FP + epsilon)
        Recall = TP /len(labels)
        F_score = (2 * Precision * Recall) /(Precision + Recall + epsilon)

        Pr.append(Precision)
        Rc.append(Recall)
        Fsc.append(F_score)

    AP = sum(Pr)/len(Pr)
    AR = sum(Rc)/len(Rc)
    AFsc = sum(Fsc)/len(Fsc) 
    
    print("\n[ISI] Avg. Precision: {0:.3f}".format(AP))
    print("[ISI] Avg. Recall     {0:.3f}".format(AR))
    print("[ISI] Avg. F-score :  {0:.3f}".format(AFsc))
    return predictions

def plot_img_with_boxes(image,bboxes,thickness = 2, color = (255,0,0)):
    plt.figure(figsize = (15,13))
    if image.shape[0] == 3:
        image = image.permute(1,2,0).numpy()
    for i in range(bboxes.shape[0]):
        start_pt = (int(bboxes[i][0]),int(bboxes[i][1]))
        end_pt = (int(bboxes[i][2]),int(bboxes[i][3]))
        image = cv2.rectangle(image,start_pt,end_pt,color,thickness)
    plt.imshow(image)
    plt.show()

def plot_pred_gt_side_by_side(eval_dataset,
                              predictions,
                              color1 = (255,0,0),
                              color2 = (0,0,255),
                              color3 = (0,255,0),
                              color4 = (247, 243, 15),
                              thickness = 2,
                              judgements = None):
    
    for idx in range(len(eval_dataset)):
        image, target = eval_dataset[idx]
        gtboxes = target["boxes"]
        bboxes = predictions[idx]
        image = image.permute(1,2,0).numpy()
        image1 = image.copy()
        image2 = image.copy()
        
        fig, axs = plt.subplots(1,2,figsize = (20,30),sharex = True)
        for i in range(bboxes.shape[0]):
            start_pt = (int(bboxes[i][0]),int(bboxes[i][1]))
            end_pt = (int(bboxes[i][2]),int(bboxes[i][3]))
            if judgements:
                if judgements[idx][i] == 0:
                    image1 = cv2.rectangle(image1,start_pt,end_pt,color3,thickness)
                elif judgements[idx][i] == -1:
                    image1 = cv2.rectangle(image1,start_pt,end_pt,color4,thickness)
                else:
                    image1 = cv2.rectangle(image1,start_pt,end_pt,color1,thickness)
            else:
                image1 = cv2.rectangle(image1,start_pt,end_pt,color1,thickness)

        for i in range(gtboxes.shape[0]):
            start_pt = (int(gtboxes[i][0]),int(gtboxes[i][1]))
            end_pt = (int(gtboxes[i][2]),int(gtboxes[i][3]))
            image2 = cv2.rectangle(image2,start_pt,end_pt,color2,thickness)

        axs[0].imshow(image1)
        axs[1].imshow(image2)
    
def display(image,fig_size = (10,8)):
    if image.shape[0] == 3:
        image = image.permute(1,2,0).numpy()
    plt.figure(figsize = fig_size)
    plt.imshow(image)
    plt.show()

class GroceryProducts(Dataset):
    def __init__(self, root, transforms = None):
        self.root = root
        self.transforms = transforms
        self.imgs = list(sorted(os.listdir(os.path.join(root, "images"))))
        self.labels = list(sorted(os.listdir(os.path.join(root, "annotations"))))

    def __getitem__(self, idx):
        num_img = self.imgs[idx][:-4]
        num_label = self.labels[idx][5:-4]
        assert num_img == num_label

        img_path = os.path.join(self.root, "images", self.imgs[idx])
       
        label_path = os.path.join(self.root, "annotations", self.labels[idx])
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
        img = cv2.resize(img,(410,410))
        img = img / 255.0
        H ,W = img.shape[0], img.shape[1]
        img = torch.from_numpy(img).float()
        img = img.permute(2,0,1)
        boxes = []
        annots = []
        
        txt_reader = open(label_path,'r')
        i = 0
        for line in txt_reader:
        
            if i > 0:

                entry = line.split(',')
                annots.append(entry[1])
                coords = map(float,entry[2:])
                x_min, x_max, y_min, y_max = coords
                x_min = x_min * W
                x_max = x_max * W

                y_min = y_min * H
                y_max = y_max * H

                boxes.append([x_min, y_min, x_max, y_max])
            i = i + 1
            
        txt_reader.close()
        boxes = torch.as_tensor(boxes, dtype=torch.float32)   
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])     
        image_id = torch.tensor([idx])
        
        # As we consider only one class for detection
        labels = torch.ones((boxes.shape[0],),dtype = torch.int64)
        # None of the instances is crowd (interpret as 'background')
        iscrowd = torch.zeros((boxes.shape[0],),dtype = torch.int64)

        target = {}
        target["boxes"] = boxes
        target["image_id"] = image_id
        target["labels"] = labels
        target["area"] = area
        target["iscrowd"] = iscrowd
        target["annots"] = annots
        
        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

In [None]:
class RetailDataset(Dataset):
    def __init__(self,ref_img_list,transform=None):
        self.transform = transform
        self.img_list = ref_img_list

    def __len__(self):
        return len(self.img_list)

    def __getitem__(self, idx):
        img_path = self.img_list[idx]
        image = cv2.imread(img_path)
        #image = load_tf_image(img_path)
        if image is None:
            print(img_path,idx,self.img_list[idx])
        
        image = cv2.resize(image,(224,224))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = torch.from_numpy(image)
        image = image.float() / 255
        image = torch.permute(image,[2,0,1])
        if self.transform:
            image = self.transform(image)
        return image

In [None]:
ref_img_list = []
for (root,dirs,files) in os.walk("/content/Grozi-3.2k-mine/Food"):
    for file in files:
        ref_img_list.append(os.path.join(root,file))
#print(ref_img_list)

In [None]:
db = set([])
for path in ref_img_list:
    category = path[0:path.rindex('/')]
    db.add(category)

print(db)

In [None]:
table = {}
for (idx,entry) in enumerate(db):
    table[entry]=idx
print(table)

In [None]:
#files = sorted(os.listdir("/content/GP-180/train"))
#REF_DIR = "/content/GP-180/train"

resnet18 = models.resnet18(pretrained = False)
res18 = deepcopy(resnet18)

path_to_embed_weights = "/content/drive/MyDrive/ML_MODELS/resnet18_embed_gp_3.2k_ep6_OHNM.pt"

tsfm = transforms.Compose([transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])
stocks_ds = RetailDataset(ref_img_list,tsfm)
stocks_dl = DataLoader(stocks_ds, batch_size = 128, shuffle = False)

In [None]:
print(stocks_ds.img_list)

In [None]:
ref_itr = iter(stocks_ds)
img = next(ref_itr)
display(img)

In [None]:
class ResNet18(nn.Module):
    def __init__(self,FREEZE = False):
        super(ResNet18,self).__init__()
        self.entrypoint = nn.Sequential(res18.conv1,
                                        res18.bn1,
                                        res18.relu,
                                        res18.maxpool)
        #self.downConv = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
        self.layer1 = res18.layer1
        self.layer2 = res18.layer2
        self.layer3 = res18.layer3
        self.layer4 = res18.layer4
        self.maxpool_b3 = nn.MaxPool2d(kernel_size = (14,14))
        self.maxpool_b4 = nn.MaxPool2d(kernel_size = (7,7))
        #self.maxpool_b2 = nn.MaxPool2d(kernel_size = (28,28))
        #self.fc1 = nn.Linear(in_features = 768,out_features = 1024, bias = True)

        if FREEZE:
            self.freeze_backbone()

    def freeze_backbone(self):
        self.entrypoint.requires_grad_(False)
        self.layer1.requires_grad_(False)
        self.layer2.requires_grad_(False)
        self.layer3.requires_grad_(False)
        #self.layer4.requires_grad_(False)

    def forward(self,X):
        X0 = self.entrypoint(X)
        X1 = self.layer1(X0)
        X2 = self.layer2(X1)
        X3 = self.layer3(X2)
        X4 = self.layer4(X3)
        x3_flat = self.maxpool_b3(X3)
        x4_flat = self.maxpool_b4(X4)
        out = torch.cat([x3_flat,x4_flat],dim = 1)
        out = out.view(X.shape[0],-1)
        #out = self.fc1(out)

        return out

encoder = ResNet18()

chkpt = torch.load(path_to_embed_weights,map_location = GPU)
encoder.load_state_dict(chkpt["model_state"])
encoder.to(GPU)
print("Temporarily moved the encoder to GPU.")

In [None]:
#del encoder

In [None]:
@torch.no_grad()
def extract_embeddings(dataloader, model, D):
    model.eval()
    embedder_dim = D

    #with torch.no_grad():
        
    embeddings = torch.zeros((len(dataloader.dataset), embedder_dim)).to(GPU)

    k = 0
    for (nb,batch) in enumerate(tqdm(dataloader)):
        batch = batch.to(GPU)
        batch_size = batch.shape[0]
        output = model(batch).view(batch_size,-1)
        output = output / torch.linalg.norm(output,ord=2,dim =1,keepdim = True)
        embeddings[k : k + batch_size] = output
        k += batch_size

    return embeddings

In [None]:
torch.cuda.empty_cache()

In [None]:
embed_size = 768
ref_embeddings = extract_embeddings(stocks_dl,encoder,embed_size)

print("\nShape of ref_embeddings is",ref_embeddings.shape)
print("Device of ref_embeddings",ref_embeddings.device)

In [None]:
# ref_img_list = sorted(os.listdir(grozi_root))
# print(ref_img_list)
# print(len(ref_img_list))

In [None]:
image_to_embed = {}
for i in range(len(ref_img_list)):
    image_to_embed[ref_img_list[i][:-4]] = ref_embeddings[i].cpu().tolist()

#print(image_to_embed)

In [None]:
#for key in image_to_embed.keys():
#    print(key)
print(image_to_embed["/content/Grozi-3.2k-mine/Food/Biscuits/25"])

In [None]:
with open("/content/drive/MyDrive/datasets/Grozi-3.2k-mine/embeddings.json",'w') as fp:
    json.dump(image_to_embed,fp)

In [None]:
fin = open("/content/drive/MyDrive/datasets/Grozi-3.2k-mine/embeddings.json")
image_to_embed = json.load(fin)
print(len(image_to_embed))

In [None]:
cluster_heads = {}
for key in table.keys():
    cluster_heads[key] = 0

In [None]:
encodings = [x for x in image_to_embed.values()]

In [None]:
db_opt = OPTICS().fit(encodings)
# core_samples_mask = np.zeros_like(dbs.labels_, dtype=bool)
# core_samples_mask[dbs.core_sample_indices_] = True
# labels = dbs.labels_

# # Number of clusters in labels, ignoring noise if present.
# n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
# n_noise_ = list(labels).count(-1)

# print(f"No. of clusters = {n_clusters_} and No. of noisy points : {n_noise_}")

In [None]:
n_clusters = len(set(db_opt.labels_)) - (1 if -1 in db_opt.labels_ else 0)
print(n_clusters)

In [None]:
class_focus = ['red','green','blue','orange','violet']
class_focus.extend(['white' for y in range(22)])

In [None]:
class_idx = []
for path,embedding in image_to_embed.items():
    category = path[:path.rindex('/')]
    class_idx.append(table[category])
    cluster_heads[category] = cluster_heads[category] + torch.FloatTensor(embedding)

In [None]:
print(len(class_idx))

In [None]:
for key in cluster_heads.keys():
    cluster_heads[key] = cluster_heads[key]/torch.linalg.norm(cluster_heads[key],ord=2,dim =0)

In [None]:
encodings = torch.stack([torch.FloatTensor(value) for value in image_to_embed.values()])
X = TSNE(n_components = 2 ,n_jobs = 4).fit_transform(encodings)

In [None]:
class_reduce = []
for x in class_idx:
    if x < 5:
        class_reduce.append(x)
    else:
        class_reduce.append(10)

In [None]:
color_hash = [class_focus[table[key[0:key.rindex('/')]]] for key in image_to_embed.keys()]

In [None]:
X_reduce = []
hash_reduce = []
for i in range(len(X)):
    if color_hash[i] != 'white':
        X_reduce.append(X[i])
        hash_reduce.append(color_hash[i])

In [None]:
X_reduce = np.array(X_reduce)
X_reduce.shape

In [None]:
plt.figure(figsize = (12,10))
plt.scatter(X_reduce[:,0],X_reduce[:,1],c = hash_reduce)
plt.show()

In [None]:
pair_wise_similarity = torch.zeros(27,27)
for (row,key1) in enumerate(cluster_heads.keys()):
    for (col,key2) in enumerate(cluster_heads.keys()):
        pair_wise_similarity[row][col] = 1 - torch.linalg.norm(cluster_heads[key1] - cluster_heads[key2],ord = 2,dim = 0)

#print(pair_wise_similarity)

In [None]:
categories = [key[key.rindex('/') + 1:] for key in cluster_heads.keys()]
categories[2] = 'DryFruits&Nuts'
print(categories)

In [None]:
#print(len(cluster_heads))
plt.figure(figsize = (22,15))
sbn.set(font_scale = 1.3)
Ax = sbn.heatmap(pair_wise_similarity,center = 0.5,xticklabels = categories,yticklabels = categories,linewidths = 0.01)
plt.xticks(rotation = 45)
plt.show()

In [None]:
class UnNormalize(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, tensor):
        for t, m, s in zip(tensor, self.mean, self.std):
            t.mul_(s).add_(m)
            # The normalize code -> t.sub_(m).div_(s)
        return tensor

def preprocess(image,transform = None):
    if isinstance(image,torch.Tensor):
        image = image.permute(1,2,0)
        image = image.cpu().numpy()
    image = cv2.resize(image,(224,224))
    image = torch.from_numpy(image)
    image = image.permute(2,0,1)
    if transform:
        image = transform(image)
    return image

def yield_top_k_matches(cropped_img, k = 5, apply_BRISK = False):
    cropped_img = preprocess(cropped_img,tsfm).unsqueeze(dim = 0).to(GPU)
    cropped_embed = encoder(cropped_img)
    
    cropped_repeat = torch.cat([cropped_embed for _ in range(len(stocks_ds))],dim = 0)
    distances = torch.linalg.norm(cropped_repeat - ref_embeddings, dim = 1)
    #assert distances.device == GPU
    values,indices = distances.sort()

    top_k = [stocks_ds.img_list[x][:-4] for x in indices[:k]] 
    return top_k

In [None]:
img = cv2.imread("/content/Grozi-3.2k-mine/Food/Drinks/Juices/137.jpg")
img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
img = cv2.resize(img,(410,410))
img = img[40:230,30:240,:]
img = img / 255.0
plt.imshow(img)
plt.show()
img = torch.from_numpy(img).float()
img = img.permute(2,0,1)
#img = img.unsqueeze(dim = 0)
#print("Shape of input image is ",img.shape)
pred_cls = yield_top_k_matches(img,k = 5,apply_BRISK = False)
print(pred_cls)

In [None]:
BATCH_SIZE = 16
#FINE_TUNE_PATH_TRAIN = "/content/GP-total-finetune/train"
FINE_TUNE_PATH_TEST = "/content/Grozi-3.2k-mine/Test/store1"
FINE_TUNE_PATH_VAL = "/content/Grozi-3.2k-mine/Test/store1"
WORKERS_no = 2

#train_ds = GroceryProducts(FINE_TUNE_PATH_TRAIN)
valid_ds = GroceryProducts(FINE_TUNE_PATH_VAL)
test_ds = GroceryProducts(FINE_TUNE_PATH_TEST)

# train_dl = DataLoader(train_ds,
#                       batch_size = BATCH_SIZE,
#                       shuffle = True,
#                       num_workers = WORKERS_no,
#                       collate_fn = utils.collate_fn)

valid_dl = DataLoader(valid_ds,
                      batch_size = BATCH_SIZE,
                      shuffle = False,
                      num_workers = WORKERS_no // 2,
                      collate_fn = utils.collate_fn)

test_dl =  DataLoader(test_ds,
                      batch_size = BATCH_SIZE,
                      shuffle = False,
                      num_workers = WORKERS_no //2
                      ,collate_fn = utils.collate_fn)

In [None]:
itr = iter(valid_ds)
sample_img,sample_target = next(itr)
plot_img_with_boxes(sample_img,sample_target["boxes"])
sample_img,sample_target = next(itr)
plot_img_with_boxes(sample_img,sample_target["boxes"])

In [None]:
def get_detection_model(num_classes,pre_trained = True):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=pre_trained)

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = faster_rcnn.FastRCNNPredictor(in_features, num_classes)

    return model

In [None]:
del model

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
#!ls /content/Grozi-3.2k-mine/Test/store1/images | wc -l
!ls /content/Grocery_products/Testing/store3/images | wc -l

In [None]:
# our dataset has two classes only - b
num_classes = 2

# get the model using our helper function
model = get_detection_model(num_classes,pre_trained = True)
# move model to the right device
#model.to(device)
#print("model moved to device {0}".format(device))
#print(model)

In [None]:
model.rpn._pre_nms_top_n['training'] = 1000
model.rpn._post_nms_top_n['training'] = 600
model.rpn._pre_nms_top_n['testing'] = 500
model.rpn._post_nms_top_n['testing'] = 50
#model.roi_heads.nms_thresh = 0.30
#model.roi_heads.score_thresh = 0.60

In [None]:
model.to(GPU)
pass

In [None]:
num_classes = 2
model = get_detection_model(num_classes,pre_trained = False)
ckpt = torch.load("/content/drive/MyDrive/ML_MODELS/faster_rcnn_res50_fpn_ft39(better-than-ft30).pt",map_location = GPU)
model.load_state_dict(ckpt["model"])
model.to(GPU)
print("detector moved to gpu")

In [None]:
#model.eval()
model.rpn._pre_nms_top_n['testing'] = 500
model.rpn._post_nms_top_n['testing'] = 30
model.roi_heads.nms_thresh = 0.30                 # Margin for overlap between objects
model.roi_heads.score_thresh = 0.60               # Confidence score of prediction

In [None]:
#for nbrs in range(5,31,5):
#print("For {0} nearest neighbours".format(nbrs))
predictions,statistics,wrong_recogs,AP = validate_voc_format(model,valid_ds,top_k=20,threshold = 0.10)

In [None]:
AP_list = []
for step in range(20):
    pre_nms_candidates = 100 + 300 * step
    model.rpn._pre_nms_top_n['testing'] = pre_nms_candidates
    model.eval()
    predictions,statistics,wrong_recogs,AP = validate_voc_format(model,valid_ds,top_k=20,threshold = 0.10)
    AP_list.append(AP)

In [None]:
@torch.no_grad()
def test_inference_speed(model,eval_dataset,top_k = 5):
    time_matrix = []
    model.eval()
    for step in range(20):
        pre_nms_candidates = 100 + 300 * step
        model.rpn._pre_nms_top_n['testing'] = pre_nms_candidates
        time_matrix.append([])
        for idx in tqdm(range(len(eval_dataset))):
            #print("\nprocessing item",idx)
            img_ts,target_ts = eval_dataset[idx]
            img_ts = img_ts.to(GPU)

            labels = target_ts["annots"]
            labels = [os.path.join(grozi_root,x[:-4]) for x in labels ]
            #print(labels)
            #labels = target_ts["labels"]
            
            #uniq_labels = set(labels)
            #pred_uniq_labels = set([])
            t_start = time()
            prediction_boxes = model([img_ts])[0]["boxes"].to(CPU)
            n_boxes_pred = prediction_boxes.shape[0]

            for i in range(len(prediction_boxes)):
                x1,y1,x2,y2 = prediction_boxes[i]
                x1,y1,x2,y2 = int(x1),int(y1),int(x2),int(y2)
                
                cropped = img_ts[:,y1:y2,x1:x2]
                pcls = yield_top_k_matches(cropped,k = top_k,apply_BRISK = False)

            t_end = time()
            time_matrix[step].append(t_end - t_start)

    return time_matrix

In [None]:
time_matrix = test_inference_speed(model,valid_ds)

In [None]:
time_matrix = torch.FloatTensor(time_matrix)
print(time_matrix.shape)

In [None]:
time_means = time_matrix.mean(axis = 1)
time_means.shape

In [None]:
region_proposals  = [100 + 300 * delta for delta in range(20)]
print(region_proposals)

In [None]:
sbn.set_style('darkgrid')
plt.figure(figsize = (10,8))
plt.plot(region_proposals,time_means * 1000,marker = '^')
plt.ylim(400,500)
plt.xlabel("No. of Region Proposals")
plt.ylabel("Inference Time (ms)")
plt.show()

In [None]:
print(stocks_ds.img_list[-13:])

In [None]:
predictions = validate_ISI_format(model,valid_ds,top_k = 1)

In [None]:
print(len(wrong_recogs))

In [None]:
def plot_predictions(wrong_preds):
    for i in range(len(wrong_preds)):
        gtruth = wrong_preds[i][1].cpu().permute(1,2,0).numpy()
                
        #gtruth = cv2.imread(os.path.join(_DIR,wrong_preds[i][0]))
        
        img1_path = os.path.join(REF_DIR,wrong_preds[i][2] + ".jpg")
        # print(img_path)
        img1 = cv2.imread(img1_path)
        #img2 = cv2.imread(os.path.join(ROOT_DIR,preds[i][1] + ".jpg"))
        #img3 = cv2.imread(os.path.join(ROOT_DIR,preds[i][2] + ".jpg"))

        #gtruth = cv2.cvtColor(gtruth,cv2.COLOR_BGR2RGB)
        img1 = cv2.cvtColor(img1,cv2.COLOR_BGR2RGB)
        #img2 = cv2.cvtColor(img2,cv2.COLOR_BGR2RGB)
        #img3 = cv2.cvtColor(img3,cv2.COLOR_BGR2RGB)

        fig, axs = plt.subplots(1,2,figsize = (14,6))
        axs[0].imshow(gtruth)
        axs[0].set_title(wrong_recogs[i][0])

        axs[1].imshow(img1)
        axs[1].set_title(wrong_recogs[i][2])
        
        # axs[2].imshow(img2)
        # axs[2].set_title("Top-2")
        # axs[3].imshow(img3)
        # axs[3].set_title("Top-3")

In [None]:
plot_predictions(wrong_recogs)

In [None]:
verdicts = [stat[3] for stat in statistics]
wrong_detections = 0

for verdict in verdicts:
    for x in verdict:
        if x == -1:
            wrong_detections += 1

print("No. of Wrong detections!",wrong_detections)

In [None]:
print("Mean IoU   Std. IoU")
for stat in statistics:
    print("{0:.2f}        {1:.2f}".format(stat[1].mean(),stat[1].std()))

In [None]:
plot_pred_gt_side_by_side(valid_ds,predictions,judgements = verdicts)

In [None]:
PATH = "/content/drive/MyDrive/ML_MODELS"
torch.save({"model": model.state_dict(),
            "optimizer": optimizer.state_dict(),
            "lr_scheduler":lr_scheduler.state_dict(),
            "epochs": num_epochs },os.path.join(PATH,"faster_rcnn_res50_fpn_ft39(better-than-ft30).pt"))

In [None]:
# pick one image from the test set
img, target = test_ds[0]

model.rpn._post_nms_top_n["testing"] = 50
model.roi_heads.nms_thresh = 0.40

# put the model in evaluation mode
model.eval()
with torch.no_grad():
    prediction = model([img.to(device)])

In [None]:
#display(img.permute(1,2,0).numpy())
bboxes = prediction[0]["boxes"]
gtboxes = target["boxes"]
image = img.permute(1,2,0).numpy()
thickness = 2

color1 = (255,0,0)
color2 = (0,0,255)
image1 = image.copy()
image2 = image.copy()

#plt.figure(figsize = (15,20))
fig, axs = plt.subplots(1,2,figsize = (20,30),sharex = True)
for i in range(bboxes.shape[0]):
    start_pt = (int(bboxes[i][0]),int(bboxes[i][1]))
    end_pt = (int(bboxes[i][2]),int(bboxes[i][3]))
    image1 = cv2.rectangle(image1,start_pt,end_pt,color1,thickness)

for i in range(gtboxes.shape[0]):
    start_pt = (int(gtboxes[i][0]),int(gtboxes[i][1]))
    end_pt = (int(gtboxes[i][2]),int(gtboxes[i][3]))
    image2 = cv2.rectangle(image2,start_pt,end_pt,color2,thickness)

axs[0].imshow(image1)
axs[1].imshow(image2)

plt.show()
#plot_img_with_boxes(img.permute(1,2,0).numpy(),prediction[0]["boxes"])