## RetinaNet

Link to the paper: https://arxiv.org/pdf/1708.02002.pdf

In [1]:
from fastai.conv_learner import *
from matplotlib import patches, patheffects

## Loading the relevant files

In [2]:
coco_path = Path('/scratch/arka/Ark_git_files/coco/')
ann_path = coco_path / 'annotations'
train_path = coco_path / 'train2017'
val_path = coco_path / 'val2017'

In [None]:
!ls {ann_path}

In [None]:
captions_train2017 = json.load((ann_path / 'captions_train2017.json').open('r'))
captions_val2017 = json.load((ann_path / 'captions_val2017.json').open('r'))
instances_train2017 = json.load((ann_path / 'instances_train2017.json').open('r'))
instances_val2017 = json.load((ann_path / 'instances_val2017.json').open('r'))
person_keypoints_train2017 = json.load((ann_path / 'person_keypoints_train2017.json').open('r'))
person_keypoints_val2017 = json.load((ann_path / 'person_keypoints_val2017.json').open('r'))

In [None]:
instances_train2017.keys()

In [None]:
'{:012}'.format(instances_train2017['annotations'][0]['image_id']) + '.jpg'

### Saving relevant files

In [None]:
train_df_list = []
# print(instances_train2017['annotations'][0]['image_id'])
# instances_train2017['annotations'][0]['id']
for d in tqdm(instances_train2017['annotations']):
    iid = d['id']
    image_id = d['image_id']
    bbox = d['bbox']
    cat_id = d['category_id']
    train_df_list.append([iid, image_id, bbox, cat_id])
train_df = pd.DataFrame(train_df_list, columns=['id','image_id', 'bbox', 'category_id'])

In [None]:
val_df_list = []
# print(instances_train2017['annotations'][0]['image_id'])
# instances_train2017['annotations'][0]['id']
for d in tqdm(instances_val2017['annotations']):
    iid = d['id']
    image_id = d['image_id']
    bbox = d['bbox']
    cat_id = d['category_id']
    val_df_list.append([iid, image_id, bbox, cat_id])
val_df = pd.DataFrame(val_df_list, columns=['id','image_id', 'bbox', 'category_id'])

In [3]:
# 
CSVS = coco_path / 'CSVS'
CSVS.mkdir(exist_ok=True)

train_csv = CSVS / 'train.csv'
val_csv = CSVS / 'val.csv'

category_mapping = coco_path / 'category_map.json'
inv_category_mapping = coco_path / 'inv_category_map.json'

In [None]:
train_df.to_csv(train_csv, header=True, index=False)
val_df.to_csv(val_csv, header=True, index=False)

In [None]:
cmap = dict()
inv_cmap = dict()
for i in instances_train2017['categories']:
    cmap[i['id']] = i['name']
    inv_cmap[i['name']] = i['id']

In [None]:
json.dump(cmap, category_mapping.open('w'))
json.dump(inv_cmap, inv_category_mapping.open('w'))

### Loading relevant files

In [4]:
train_df = pd.read_csv(train_csv, header='infer')
val_df = pd.read_csv(val_csv, header='infer')
cat_map = json.load(category_mapping.open('r'))
inv_cat_map = json.load(inv_category_mapping.open('r'))

### Creating Dataset and Dataloader

In [None]:
??BaseDataset

In [5]:
catlist = []
cat2lab = dict()
lab2cat = dict()
c = 0
for i, v in cat_map.items():
    catlist.append(int(i))
    cat2lab[int(i)] = c
    lab2cat[c] = int(i)
    c += 1

In [6]:
def hw_bb(bb): return np.array([bb[1], bb[0], bb[3]+bb[1]-1, bb[2]+bb[0]-1])
def bb_hw(a): return np.array([a[1],a[0],a[3]-a[1]+1,a[2]-a[0]+1])

In [7]:
import ast
class CocoDS(BaseDataset):
    def __init__(self, df, tdir, sz=224, tfm1=None):
        # ['id', 'image_id', 'bbox', 'category_id']
        self.df = df
        self.tdir = tdir        
        self.n = self.get_n()
        self.c = self.get_c()
        self.sz = sz
        # tfm1 for bbox
        self.tfm1 = tfm1
        
    def get_n(self):
        return len(self.df)
    
    def get_c(self):
        return 80
    
    def get_sz(self):
        return self.sz
    
    def get_x(self, i):
        fid = '{:012}'.format(self.df.iloc[i]['image_id']) + '.jpg'
        fname = self.tdir / fid
        return open_image(fname)
    
    def get_y(self, i):
        y_bbox = self.df.iloc[i]['bbox']
        cat_id = self.df.iloc[i]['category_id']
        y_id = cat2lab[cat_id]
        y_bbox = ast.literal_eval(y_bbox)
        return hw_bb(np.array(y_bbox)), y_id
    
    def get1item(self, idx):
        x,y = self.get_x(idx),self.get_y(idx)
        x, y1 = self.tfm1(x, y[0])
        return x, (y1, y[1])
    
    def denorm(self,arr):
        """Reverse the normalization done to a batch of images.
        Arguments:
            arr: of shape/size (N,3,sz,sz)
        """
        if type(arr) is not np.ndarray: arr = to_np(arr)
        if len(arr.shape)==3: arr = arr[None]
        return self.tfm1.denorm(np.rollaxis(arr,1,4))

In [8]:
imagenet_stats = A([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
sz = 224
# tfms = tfms_from_stats(imagenet_stats, sz, aug_tfms=transforms_basic, tfm_y=TfmType.COORD)
bs = 9
augs = [RandomFlip(tfm_y=TfmType.COORD),
        RandomRotate(30, tfm_y=TfmType.COORD),
        RandomLighting(0.1,0.1, tfm_y=TfmType.COORD)]
tfms = tfms_from_stats(imagenet_stats, sz, aug_tfms=augs, crop_type=CropType.NO, tfm_y=TfmType.COORD)
trn_ds = CocoDS(train_df, train_path, sz, tfms[0])
fix_ds = CocoDS(train_df, train_path, sz, tfms[1])
# trn_dl = DataLoader(trn_ds, batch_size=bs)
val_ds = CocoDS(val_df, val_path, sz, tfms[1])
aug_ds = CocoDS(val_df, val_path, sz, tfms[0])
ds = [trn_ds, val_ds, fix_ds, aug_ds, None, None]
# val_dl = DataLoader(val_ds, batch_size=bs)

## Checking if everything is correct

In [9]:
def show_img(im, figsize=None, ax=None):
    if not ax: fig,ax = plt.subplots(figsize=figsize)
    ax.imshow(im)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    return ax
def draw_outline(o, lw):
    o.set_path_effects([patheffects.Stroke(
        linewidth=lw, foreground='black'), patheffects.Normal()])
def draw_rect(ax, b, color='white'):
    patch = ax.add_patch(patches.Rectangle(b[:2], *b[-2:], fill=False, edgecolor=color, lw=2))
    draw_outline(patch, 4)
def draw_text(ax, xy, txt, sz=14, color='white'):
    text = ax.text(*xy, txt,
        verticalalignment='top', color=color, fontsize=sz, weight='bold')
    draw_outline(text, 1)


In [None]:
i = 0
ax = show_img(trn_ds.denorm(x)[i])
im0_a = [k[i] for k in y]
b = bb_hw(im0_a[0])
draw_rect(ax, b)
# draw_text(ax, b[:2], 'dog')
draw_text(ax, b[:2], cat_map[str(lab2cat[im0_a[1].cpu().numpy().item()])])

In [None]:
idx=5
fig,axes = plt.subplots(3,3, figsize=(9,9))
for i,ax in enumerate(axes.flat):
    x,y=next(iter(md.aug_dl))
    ima=md.val_ds.denorm(to_np(x))[idx]
    y1 = [k[idx] for k in y]
    b = bb_hw(to_np(y1[0]))
    print(b)
    show_img(ima, ax=ax)
    draw_rect(ax, b)
    draw_text(ax, b[:2], cat_map[str(lab2cat[y1[1].cpu().numpy().item()])])

## Creating the Model Data

In [11]:
data_path = coco_path / 'dpath'
data_path.mkdir(exist_ok=True)

In [12]:
md = ImageData(data_path, ds, bs, num_workers=4, classes=80)

## Generating the Anchors

In [13]:
def hw2corners(ctr, hw): return torch.cat([ctr-hw/2,ctr+hw/2], dim=1)

In [14]:
na = 9
anc_grids = [int(np.ceil(sz / 2**i)) for i in range(3, 8)]
anc_zooms = [1., 2**(1/3), 2**(2/3)]
anc_ratios = [(1.,1.), (1.,2), (2,1.)]
anchor_scales = [(anz*i,anz*j) for anz in anc_zooms for (i,j) in anc_ratios]
anc_offsets = [1/(o*2) for o in anc_grids]

anc_x = np.concatenate([np.tile(np.linspace(ao, 1-ao, ag), ag)
                        for ao,ag in zip(anc_offsets,anc_grids)])
anc_y = np.concatenate([np.repeat(np.linspace(ao, 1-ao, ag), ag)
                        for ao,ag in zip(anc_offsets,anc_grids)])
anc_ctrs = np.repeat(np.stack([anc_x,anc_y], axis=1), na, axis=0)
anc_sizes  =   np.concatenate([np.array([[o/ag,p/ag] for i in range(ag*ag) for o,p in anchor_scales])
               for ag in anc_grids])
grid_sizes = V(np.concatenate([np.array([ 1/ag       for i in range(ag*ag) for o,p in anchor_scales])
               for ag in anc_grids]), requires_grad=False).unsqueeze(1)
anchors = V(np.concatenate([anc_ctrs, anc_sizes], axis=1), requires_grad=False).float()
anchor_cnr = hw2corners(anchors[:,:2], anchors[:,2:])
anchors.size()

torch.Size([9441, 4])

In [15]:
a=np.reshape((to_np(anchor_cnr) + to_np(torch.randn(*anchor_cnr.size()))*0.01)*224, -1)

In [16]:
# import matplotlib.cm as cmx
# import matplotlib.colors as mcolors
# from cycler import cycler

# def get_cmap(N):
#     color_norm  = mcolors.Normalize(vmin=0, vmax=N-1)
#     return cmx.ScalarMappable(norm=color_norm, cmap='Set3').to_rgba

# num_colr = 12
# cmap = get_cmap(num_colr)
# colr_list = [cmap(float(x)) for x in range(num_colr)]

# def show_ground_truth(ax, im, bbox, clas=None, prs=None, thresh=0.3):
#     bb = [bb_hw(o) for o in bbox.reshape(-1,4)]
#     if prs is None:  prs  = [None]*len(bb)
#     if clas is None: clas = [None]*len(bb)
#     ax = show_img(im, ax=ax)
#     for i,(b,c,pr) in enumerate(zip(bb, clas, prs)):
#         if((b[2]>0) and (pr is None or pr > thresh)):
#             draw_rect(ax, b, color=colr_list[i%num_colr])
#             txt = f'{i}: '
#             if c is not None: txt += ('bg' if c==len(id2cat) else id2cat[c])
#             if pr is not None: txt += f' {pr:.2f}'
#             draw_text(ax, b[:2], txt, color=colr_list[i%num_colr])
# fig, ax = plt.subplots(figsize=(7,7))
# show_ground_truth(ax, x[0], a)

## Creating the Model

### Backbone

In [None]:
res50 = resnet50(pretrained=True)

In [None]:
a1=res50.layer2[-1]

In [None]:
a1.conv3.out_channels

In [None]:
# res50_backbone = cut_model(res50, -2)

In [None]:
# res50_backbone = nn.Sequential(*cut_model(res50, -2))

In [None]:
res50_backbone = res50

In [None]:
learn_tmp = ConvLearner.pretrained(resnet50, md)

In [None]:
learn_tmp.unfreeze()

In [None]:
learn_tmp.summary()

### FPN

In [None]:
res50.

In [None]:
def pad_out(k):
    return (k-1)//2

In [None]:
class FPN_backbone(nn.Module):
    def __init__(self, inch_list):
        super().__init__()
        
#         self.backbone = backbone
        
        # expects c3, c4, c5 channel dims
        self.inch_list = inch_list
        self.feat_size = 256
        self.p7_gen = nn.Conv2d(in_channels=self.feat_size, out_channels=self.feat_size, stride=2, kernel_size=3,
                               padding=1)
        self.p6_gen = nn.Conv2d(in_channels=self.inch_list[2], 
                            out_channels=self.feat_size, kernel_size=3, stride=2, padding=pad_out(3))
        self.p5_gen1 = nn.Conv2d(in_channels=self.inch_list[2], 
                                 out_channels=self.feat_size, kernel_size=1, padding=pad_out(1))
        self.p5_gen2 = nn.Upsample(scale_factor=2, mode='nearest')
        self.p5_gen3 = nn.Conv2d(in_channels=self.feat_size, out_channels=self.feat_size,
                                kernel_size=3, padding=pad_out(3))
        
        self.p4_gen1 = nn.Conv2d(in_channels=self.inch_list[1], out_channels=self.feat_size, kernel_size=1,
                                padding=pad_out(1))
        self.p4_gen2 = nn.Upsample(scale_factor=2, mode='nearest')
        self.p4_gen3 = nn.Conv2d(in_channels=self.feat_size, out_channels=self.feat_size, kernel_size=3, 
                                padding=pad_out(3))
        
        self.p3_gen1 = nn.Conv2d(in_channels=self.inch_list[0], out_channels=self.feat_size, kernel_size=1,
                                padding=pad_out(1))
        self.p3_gen2 = nn.Conv2d(in_channels=self.feat_size, out_channels=self.feat_size, kernel_size=3,
                                padding=pad_out(3))
        
    def forward(self, inp):
        # expects inp to be output of c3, c4, c5
        c3 = inp[0]
        c4 = inp[1]
        c5 = inp[2]
        p51 = self.p5_gen1(c5)
        p5_out = self.p5_gen3(p51)
        
        p5_up = self.p5_gen2(p51)
        p41 = self.p4_gen1(c4) + p5_up
        p4_out = self.p4_gen3(p41)
        
        p4_up = self.p4_gen2(p41)
        p31 = self.p3_gen1(c3) + p4_up
        p3_out = self.p3_gen2(p31)
        
        p6_out = self.p6_gen(c5)
        
        p7_out = self.p7_gen(F.relu(p6_out))
        
        return [p3_out, p4_out, p5_out, p6_out, p7_out]
        

In [None]:
def flatten_conv(x,k):
    bs,nf,gx,gy = x.size()
    x = x.permute(0,2,3,1).contiguous()
    return x.view(bs,-1,nf//k)

In [None]:
def initialize_vals(mdl):
    for m in mdl.modules():
        if isinstance(m, nn.Conv2d):
            n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
            m.weight.data.normal_(0, math.sqrt(2. / n))
        elif isinstance(m, nn.BatchNorm2d):
            m.weight.data.fill_(1)
            m.bias.data.zero_()
    

In [None]:
class classf_model(nn.Module):
    def __init__(self, fs=256, na=9, nc=80):
        self.na = na
        self.nc = nc
        self.feat_size = fss
        self.classification_model = nn.Sequential(nn.ModuleList([nn.Conv2d(in_channels=self.feat_size,
                                                                           out_channels=self.feat_size,
                                                                           kernel_size=3, padding=1)]*4),
                                                  nn.Conv2d(in_channels=self.feat_size,
                                                            out_channels=self.na * self.nc,
                                                            kernel_size=3, padding=1))
        initialize_vals(self.classification_model)
    def forward(self, inp):
        out = self.classification_model(inp)
        out2 = flatten_conv(out, self.na)

In [None]:
class regress_model(nn.Module):
    def __init__(self, fs=256, na=9, nc=80):
        self.na = na
        self.nc = nc
        self.feat_size = fss
        self.reg_model = nn.Sequential(nn.ModuleList([nn.Conv2d(in_channels=self.feat_size,
                                                                           out_channels=self.feat_size,
                                                                           kernel_size=3, padding=1)]*4),
                                                  nn.Conv2d(in_channels=self.feat_size,
                                                            out_channels=self.na * 4,
                                                            kernel_size=3, padding=1))
        initialize_vals(self.regress_model)
    def forward(self, inp):
        out = self.reg_model(inp)
        out2 = flatten_conv(out, self.na)  

In [None]:
class retina_net_model(nn.Module):
    def __init__(self, resnet_model):
        super().__init__()
        self.res_backbone = resnet_model
        self.fpn_sizes = [self.res_backbone.layer2[-1].conv3.out_channels, 
                          self.res_backbone.layer3[-1].conv3.out_channels,
                          self.res_backbone.layer4[-1].conv3.out_channels]
        self.feat_size = 256
        self.num_anch = 9
        self.num_class = 80
        self.fpn = FPN_backbone(self.fpn_sizes)
        self.cls_model = classf_model(self.fs, self.num_anch, self.num_class)
        self.reg_model = regress_model(self.fs, self.num_anch, self.num_class)
        
        
    def forward(self, inp):
        x = self.conv1(inp)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x1 = self.layer1(x)
        x2 = self.layer2(x1)
        x3 = self.layer3(x2)
        x4 = self.layer4(x3)

        features = self.fpn([x2, x3, x4])
        out_cls = []
        out_bbx = []
        for p in features:
            out_cls.append(self.cls_model(p))
            out_bbx.append(self.reg_model(p))
        
        return [torch.cat(out_cls, dim=1),
                torch.cat(out_box, dim=1)]