# Faster RCNN Model

I will be using the Pascal Visual Object Classes 2007 Dataset

https://www.kaggle.com/datasets/zaraks/pascal-voc-2007?select=VOCtrainval_06-Nov-2007

This model will be based off the guidelines outlined in this instructional video: 
https://www.youtube.com/watch?v=Qq1yfWDdj5Y&list=WL&index=1

## implement simple Faster-RCNN Model

In [2]:
#import the libraries
import torch
import torch as nn
import torchvision
import math

In [3]:
#choose the accelerator if available. On Mac so using mps.
device= torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
print(device)

mps


In [5]:
#create Region Proposal Network class
class RegionProposalNetwork(nn.Module):
    #512 is input created by feature map output of the backbone we are using
    def __init__(self, in_channels= 512):
        super(RegionProposalNetwork, self).__init__()
        #specify scales and aspect ratio's for the anchor boxes
        self.scales= [128, 256, 512]
        self.aspect_ratios= [0.5, 1, 2]
        self.num_anchors= len(self.scales) * len(self.aspect_ratios)

        # 3 layers used
        #Layer1: 3x3 convolutional layer
        self.rpn_conv= nn.Conv2d(in_channels,
                                in_channels,
                                kernel_size= 3,
                                stride= 1,
                                padding=1)
        #Layer2: 1x1 classification layer
        self.cls_layer= nn.Conv2d(in_channels,
                                  self.num_anchors, 
                                  kernel_size= 1,
                                  stride= 1)
        #Layer#3: 1x1 regression layer
        self.bbox_reg_layer= nn.Conv2d(in_channels,
                                        self.num_anchors *4,
                                        kernel_size= 1,
                                        stride= 1)
        """
        relevant shapes
        image              [1x3x600x800]
        feat               [1x512x37x50]
        target['bboxes']   [1x6x4]
        target['labels']   [1x6]
        cls_scores         [1x9x37x50]
        box_transform_pred [1x36x37x50]
        stride_h/w         [16]
        base_anchors       [9x4]
        anchors            [16650x4]
        """

        def generate_anchors(self, image, feat):
            grid_h, grid_w= feat.shape[-2:]
            image_h, image_w= image.shape[-2:]
            
            stride_h= torch.tensor(image_h // grid_h,
                                  dtype= torch.int32, #mps can only handle 32 instead of cuda 64
                                  device= feat.device)
            
            stride_w= torch.tensor(image_w // grid_w,
                                  dtype= torch.int32,
                                  device= feat.device)

            scales= torch.as_tensor(self.scales,
                                    dtype= feat.dtype,
                                    device= feat.device)

            aspect_ratios= torch.as_tensor(self.aspect_ratios,
                                    dtype= feat.dtype,
                                    device= feat.device)

            #ensure h/w= aspect_ratios and h*w= 1
            h_ratios= torch.sqrt(aspect_ratios)
            w_ratios= 1/h_ratios

            ws= (w_ratios[:, None] * scales[None, :]).view(-1)
            hs= (h_ratios[:, None] * scales[None, :]).view(-1)

            base_anchors= torch.stack([-ws, -hs, ws, hs], dim=1) /2
            base_anchors= base_anchors.round()

            #convert all base anchors to grid of all anchors through
            #shifts in x axis (0,1,...,W_feat-1) * stride_w
            shifts_x= torch.arange(0, grid_w,
                                   dtype= torch.int32,
                                   device= feat.device) * stride_w

            #shifts in y axis (0,1,...,H_feat-1) * stride_h
            shifts_y= torch.arange(0, grid_h,
                                   dtype= torch.int32,
                                   device= feat.device) * stride_h

            shifts_y, shifts_x= torch.meshgrid(shifts_y, shifts_x,
                                               indexing= 'ij')

            #(H_feat, W_feat)
            shifts_x= shifts_x.reshape(-1)
            shifts_y= shifts_y.reshape(-1)
            shifts= torch.stack((shifts_x,
                                 shifts_y,
                                 shifts_x,
                                 shifts_y), dim= 1)
            #shifts > (H_feat * W_feat, 4)

            #base_anchors > (num_anchors_per_location, 4)
            #shifts > (H_feat * W_feat, 4)
            anchors= (shifts.view(-1,1,4) + base_anchors.view(1,-1,4))
            #(H_feat * W_feat, num_anchors_per_location, 4)

            anchors= anchors.reshape(-1, 4)
            #anchors > (H_feat 8 W_feat * num_anchors_per_location, 4)
            return anchors
            
        #forward pass
        def forward(self, image, feat, target): 
            #call RPN layers
            rpn_feat= nn.Relu()(self.rpn_conv(feat))
            cls_scores= self.cls_layer(rpn_feat)
            box_transform_pred= self.bbox_reg_layer(rpn_feat)

            #generate anchors
            anchors= self.generate_anchors(image, feat)

SyntaxError: incomplete input (965009234.py, line 38)