In [1]:
from google.colab import drive

drive.mount("/content/gdrive")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
import os
os.chdir("/content/gdrive/MyDrive")
os.listdir()

['Colab Notebooks', 'train_images', 'csv', 'SSD']

In [3]:
%matplotlib inline
import numpy as np # Linear Algebra
import gc
import pandas as pd # Data Processing, CSV file I/O (e.g. pd.read_csv)

import torch
from torchvision import models
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn.functional as F
import torchvision
from torchvision.ops import RoIAlign
from torchsummary import summary


from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os
import warnings
warnings.filterwarnings("ignore")
import timeit
from SSD.box_utils import cxcy_to_gcxgcy,cxcy_to_xy,gcxgcy_to_cxcy,xy_to_cxcy,find_jaccard_overlap,get_target_image
from SSD.ssd import SSD
from SSD.loss import MultiBoxLoss,DecoderLoss
from SSD.dataset import SSDDataset
from SSD.iou import bb_intersection_over_union
from SSD.decoder import Decoder

In [4]:
imageData=pd.read_csv("/content/gdrive/MyDrive/csv/imageData_3.csv")
data=pd.read_csv("/content/gdrive/MyDrive/csv/data_3.csv")

In [5]:
image_names=list(data['image_name'].unique())
imageArray=pd.DataFrame({"image_name":[],"image_array":[]})
for im in image_names:
  try:
    img=Image.open("train_images/"+im)
    imageArray=pd.concat([imageArray,pd.DataFrame({"image_name":[im],"image_array":[np.array(img)]})])
    del img
  except:
    pass

# Dataset Genertation

In [6]:
class DecoderDataset(Dataset):
    def __init__(self, data, imageData, imageArray, is_test=False, transform=None):
        #self.annotation_folder_path = csv_path
        self.data=data # Contains the information about bounding boxes
        self.imageData=imageData # Contains the coordinate of the cropped images
        self.imageArray=imageArray # Contains the arrays of the original 18 images
        self.all_images=self.data['image_name'].unique()
        self.transform = transform
        self.is_test = is_test
        
    def __getitem__(self, idx):
        img_name = self.all_images[idx]
        if "_" in img_name:
          original_img_name=img_name.split("_")[0]+".jpg"
        else:
          original_img_name=img_name
        coord=self.imageData[self.imageData['image_name']==img_name][["x_min","y_min","x_max","y_max"]].values[0]
        img = Image.fromarray(self.imageArray[self.imageArray['image_name']==original_img_name]['image_array'].values[0][
            int(coord[1]):int(coord[3]),int(coord[0]):int(coord[2]),:])
        img = img.convert('RGB')
        
        if not self.is_test:
            annotations=self.data[self.data['image_name']==img_name]

            self.box = self.get_xy(annotations)

            self.new_box = torch.cuda.FloatTensor(self.box_resize(self.box, img))
            if self.transform:
                img = self.transform(img)
            

            #self.labels=torch.FloatTensor(annotations['label'].values).cuda()

            """# Encode the labels with Int
            self.le=LabelEncoder()
            self.labels=torch.FloatTensor(self.le.fit_transform(self.labels))"""

            return img_name,img.cuda(), self.new_box
        else:
            return img_name,img.cuda()
    
    def __len__(self):
        return len(self.all_images)
        
    def get_xy(self, annotation):
        boxes=torch.FloatTensor(annotation[['xmin','ymin','xmax','ymax']].values)
        return boxes
        
    def box_resize(self, box, img, dims=(300, 300)):
        old_dims = torch.cuda.FloatTensor([img.width, img.height, img.width, img.height]).unsqueeze(0)
        new_box = box.cuda() / old_dims
        new_dims = torch.cuda.FloatTensor([dims[1], dims[0], dims[1], dims[0]]).unsqueeze(0)
        #new_box = new_box * new_dims
        
        return new_box
    
    def collate_fn(self, batch):
        """
        :param batch: an iterable of N sets from __getitem__()
        :return: a tensor of images, lists of varying-size tensors of bounding boxes, labels, and difficulties
        """
        original_images=list() # Name of the images
        images = list()
        boxes = list()
        labels = list()
#         difficulties = list()

        for b in batch:
            original_images.append(b[0])
            images.append(b[1])
            boxes.append(b[2])
            #labels.append(b[3])
#             difficulties.append(b[3])

        images = torch.stack(images, dim=0)

        return original_images,images, boxes  # tensor (N, 3, 300, 300),

In [7]:
tsfm = transforms.Compose([
    transforms.Resize([300,300]),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
tsfmV2=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
# Batch Size
BS=8

In [8]:
train_ds = DecoderDataset(data.iloc[4606:,:], imageData,imageArray, transform=tsfm)
train_dl = DataLoader(train_ds, batch_size=BS, shuffle=True, collate_fn=train_ds.collate_fn)

In [None]:
x=next(iter(train_dl))

# Load Pretrained SSD Model

In [None]:
N_CLASSES=data['label'].nunique()+1
model=model = SSD(n_classes=N_CLASSES)
model.load_state_dict(torch.load("/content/gdrive/MyDrive/SSD/model_giou.pth"))

In [13]:
class DetectorDecoder(nn.Module):
  def __init__(self,detector=None,output_size=(5,5),num_filter=50000):
    super(DetectorDecoder,self).__init__()
    """ 
    Parameter:
    detector --> Object Detector
    output_size --> Output size of ROI Align step. Default (5,5)
    filter --> Number of feature maps after merging all the feature maps
    """

    # The feature maps in detector will work as encoded information.
    self.detector=detector
    self.output_size=output_size
    self.num_filter=num_filter
        
     # ROIALIGN
    self.roialign=RoIAlign(self.output_size,1,-1)
    

    # Decoding Layers
    self.convT_1=nn.ConvTranspose2d(self.num_filter,1024,kernel_size=3,stride=2) #(256,39,39)
    self.convT_2=nn.ConvTranspose2d(1024,512,kernel_size=3,stride=2) #(128,79,79)
    self.upsample_1=nn.Upsample((80,80)) #(128,80,80)

    self.convT_3=nn.ConvTranspose2d(512,128,kernel_size=3,stride=2)
    self.upsample_2=nn.Upsample((160,160))

    self.convT_4=nn.ConvTranspose2d(128,32,kernel_size=3,stride=2)
    self.convT_5=nn.ConvTranspose2d(32,3,kernel_size=1,stride=2)
    self.upsample_3=nn.Upsample((640,640))

  def forward(self,mask):
    x,self.boxes=mask

    # Get RoI's offset
    self.locs,_=self.detector(x)


    # Get all the feature maps
    x=self.get_feature_maps(x)

    out=self.convT_1(x)
    out=self.convT_2(out)
    out=self.upsample_1(out)
    out=self.convT_3(out)
    out=self.upsample_2(out)
    out=self.convT_4(out)
    out=self.convT_5(out)
    out=self.upsample_3(out)
    return out
  
  def get_feature_maps(self,x):
    """ Get all the feature maps from the ssd detector"""
    self.rescale_factors = nn.Parameter(torch.cuda.FloatTensor(1, 512, 1, 1))  # there are 512 channels in conv4_3_feats
    

    # Get Conv4 and Conv7 from VGG
    self.conv4_3,self.conv7=self.detector.base(x)

    # Rescale conv4_3 after L2 norm
    norm = self.conv4_3.pow(2).sum(dim=1, keepdim=True).sqrt()  # (N, 1, 38, 38)
    self.conv4_3 = self.conv4_3 / norm  # (N, 512, 38, 38)
    self.conv4_3 = self.conv4_3 * self.rescale_factors 

    # Get rest of the feature maps from
    self.conv8_2, self.conv9_2, self.conv10_2, self.conv11_2 = self.detector.aux_convs(self.conv7)

    # Merge all the Feature maps after roi aligning
    feature_map=self.merge_feature_maps()
    return feature_map
    
  
  def merge_feature_maps(self):
    """ Merge all Fearure Maps"""
    # ROI Postions in Locs data Dictinary
    self.__roi_pos_dict={"conv4_3":[0,5776],"conv7":[5776,7942],
                 "conv8_2":[7942,8542],"conv9_2":[8542,8692],"conv10_2":[8692,8728],
                 "conv11_2":[8728,8732]}

    """# Region of Interest Dictionary
    self.__roi_dict={"conv4_3":self.locs[:,:5776,:],"conv7":self.locs[:,5776:7942,:],"conv8_2":self.locs[:,7942:8542,:],
             "conv9_2":self.locs[:,8542:8692,:],"conv10_2":self.locs[:,8692:8728,:],"conv11_2":self.locs[:,8728:8732,:]}"""

    # Feature Maps Dictionary
    self.__feat_map_dict={"conv4_3":self.conv4_3,"conv7":self.conv7,
                 "conv8_2":self.conv8_2,"conv9_2":self.conv9_2,"conv10_2":self.conv10_2,
                 "conv11_2":self.conv11_2}
    
    # Find Roi's for conv4_3, since we will use roialign here
    self.__roi_conv4_3=self.roi_align("conv4_3",(38,38))
    self.__roi_conv7=self.roi_align("conv7",(19,19))
    self.__roi_conv8_2=self.roi_align("conv8_2",(10,10))
    self.__roi_conv9_2=self.roi_align("conv9_2",(5,5))
    self.__roi_conv10_2=self.roi_align("conv10_2",(3,3))
    #self.__roi_conv11_2=self.roi_align("conv11_2",(3,3))

    feature_map=torch.cat((self.__roi_conv4_3,self.__roi_conv7,self.__roi_conv8_2,self.__roi_conv9_2,self.__roi_conv10_2),dim=1)

    # Since Feature maps dimensions are different for every image, we will take first 50000 filters
    if feature_map.size(1)>=self.num_filter:
      feature_map=feature_map[:,:self.num_filter,:,:]
    else:
      # We need to upsample
      upsample=nn.Upsample(5,self.num_filter)
      feature_map=torch.moveaxis(upsample(torch.moveaxis(feature_map,1,-1)),-1,1)
    assert feature_map.shape==torch.Size([self.__batch_size,self.num_filter,self.output_size[0],self.output_size[1]])

    
    return feature_map
  
  def add_batch_number(self,boxes,batch):
    """ Add Batch Number in the first column of the rois """
    box_size=boxes.size(0)
    return torch.stack((torch.ones((box_size,1,4),device="cuda")*batch,boxes.reshape(box_size,1,-1)),dim=2).reshape(-1,8)[:,3:]
  
  
  def find_roi(self,feature_name,feature_size):
    """ It finds the RoI's for a specific feature name"""
    self.__batch_size=self.locs.size(0)
    roi_sp=[]
    min_,max_=self.__roi_pos_dict[feature_name]
    

    for i in range(self.__batch_size):
      n_objects=self.boxes[i].size(0)
      overlap=find_jaccard_overlap(self.boxes[i],torch.clamp(self.detector.priors_xy[min_:max_,:],min=0))
      locs_xy=cxcy_to_xy(gcxgcy_to_cxcy(self.locs[i],self.detector.priors_cxcy))

      _, prior_for_each_object = overlap.max(dim=1)
      locs_for_map=torch.clamp(locs_xy[min_:max_,:][prior_for_each_object],min=0,max=1)*(feature_size[0]-1)
      locs_for_map=self.add_batch_number(locs_for_map,i)
      roi_sp.append(locs_for_map)
    roi_sp=torch.cat(roi_sp,dim=0)
    return roi_sp

    
  def roi_align(self,feature_name,feature_size):
    """ Roi Alignment """
    w,h=self.output_size
    
    self.__roi=self.find_roi(feature_name,feature_size)
    aligned_map=self.roialign(self.__feat_map_dict[feature_name],self.__roi)
    return aligned_map.reshape(self.__batch_size,-1,w,h)

# Model Training

In [16]:
EPOCH=10
LR=1e-4

In [17]:
decoder=Decoder(model).cuda()
criterion=DecoderLoss("mse")
optimizer=torch.optim.Adam(decoder.parameters(),lr=LR)

In [54]:
from tqdm import tqdm
import time
for epoch in range(1, EPOCH+1):
    decoder.train()
    train_loss = []
    time_start=time.time()
    for step, (target_image_names,resized_image,boxes) in enumerate(train_dl):
        time_1 = time.time()
        target_image=get_target_image(target_image_names)
        
        reconstructed_image = decoder(resized_image,boxes)
        
        loss = criterion(reconstructed_image,target_image)
 
        
        # Backward prop.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # Losses.update(loss.item(), images.size(0))
        train_loss.append(loss.item())
    time_end=time.time()
    total_time=str((time_end-time_start)//60)+" minutes and "+ str((time_end-time_start)%60)+" seconds "
        
        
    print("Time:",total_time, ' epoch: ', epoch, '/', EPOCH,
            'train loss:', '{:.4f}'.format(np.mean(train_loss)))

RuntimeError: ignored

In [15]:
input.shape

torch.Size([8, 3, 640, 640])

In [None]:
x=torch.rand((8,5777,4))

In [None]:
feature_map_pos={"conv4_3":[0,5776],"conv7":[5776,7942],
                 "conv8_2":[7942,8542],"conv9_2":[8542,8692],"conv10_2":[8692,8728],
                 "conv11_2":[8728,8732]}

feature_map={"conv4_3":locs[:,:5776,:],"conv7":locs[:,5776:7942,:],"conv8_2":locs[:,7942:8542,:],
             "conv9_2":locs[:,8542:8692,:],"conv10_2":locs[:,8692:8728,:],"conv11_2":locs[:,8728:8732,:]}

NameError: ignored

In [None]:
for key in feature_map_pos:
  print(key)

conv4_3
conv7
conv8_2
conv9_2
conv10_2
conv11_2


In [None]:
def find_map(box):
  

In [None]:
x=next(iter(train_dl))
locs,_=model(x[0].cuda())

In [None]:
conv4_3=model.base(x[0].cuda())[0]

In [None]:
conv4_3.shape

torch.Size([8, 512, 38, 38])

In [None]:
# Get the Real Boxes
boxes=x[1][1]

# Get the predicted Boxes
priors_xy=cxcy_to_xy(model.priors_cxcy)

# Get the jaccard overlap
overlap=find_jaccard_overlap(boxes,torch.clamp(priors_xy,min=0)[:5776])

In [None]:
temp=overlap.max(dim=1)[1]

In [None]:
temp[temp<5776]

tensor([ 829,  701,  797,  769,  733,  125,   97,  161,  193,  221, 2436, 2536,
        2556, 2504, 2452, 2468, 2520, 2572, 2484, 3316, 3332, 3300, 3368, 3384,
        3400, 3420, 3436, 3348, 4088, 4228, 4212, 4128, 4156, 4172, 4112, 4140,
        4200, 4184, 5000, 4988, 4880, 4920, 4896, 4908, 4972, 4960, 4948, 4868,
        4932], device='cuda:0')

In [None]:
temp[torch.le(temp,5776)]

tensor([ 829,  701,  797,  733,  125,   97,  193,  221, 2436, 2536, 2556, 2504,
        2452, 2468, 2520, 2572, 2484, 3316, 3332, 3300, 3368, 3384, 3400, 3420,
        3436, 3348, 4088, 4228, 4212, 4128, 4156, 4172, 4112, 4140, 4200, 4184,
        5000, 4988, 4880, 4920, 4896, 4908, 4972, 4960, 4948, 4868, 4932],
       device='cuda:0')

In [None]:
temp

tensor([ 829,  701,  797, 6006,  733,  125,   97, 5778,  193,  221, 2436, 2536,
        2556, 2504, 2452, 2468, 2520, 2572, 2484, 3316, 3332, 3300, 3368, 3384,
        3400, 3420, 3436, 3348, 4088, 4228, 4212, 4128, 4156, 4172, 4112, 4140,
        4200, 4184, 5000, 4988, 4880, 4920, 4896, 4908, 4972, 4960, 4948, 4868,
        4932], device='cuda:0')

In [None]:
overlap.max(dim=0)[1]

tensor([0, 0, 0,  ..., 7, 7, 8], device='cuda:0')

In [None]:
locs.size(0)

8

In [None]:
boxes[0]

tensor([[0.6000, 0.5750, 0.8703, 0.7906]], device='cuda:0')