# SSD with SqueezeNet

In [None]:
import torch
import torchvision
import torch.nn as nn 

In [None]:
import torch
import torch.nn as nn
import torch.nn.init as init
from torch.utils.model_zoo import load_url as load_state_dict_from_url

class Fire(nn.Module):

    def __init__(self, inplanes, squeeze_planes,
                 expand1x1_planes, expand3x3_planes):
        super(Fire, self).__init__()
        self.inplanes = inplanes
        self.squeeze = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1)
        self.squeeze_activation = nn.ReLU(inplace=True)
        self.expand1x1 = nn.Conv2d(squeeze_planes, expand1x1_planes, kernel_size=1)
        self.expand1x1_activation = nn.ReLU(inplace=True)
        self.expand3x3 = nn.Conv2d(squeeze_planes, expand3x3_planes, kernel_size=3, padding=1)
        self.expand3x3_activation = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.squeeze_activation(self.squeeze(x))
        x = torch.cat([
            self.expand1x1_activation(self.expand1x1(x)),
            self.expand3x3_activation(self.expand3x3(x))
        ], dim=1)
        return x


class SqueezenetDet(nn.Module):

    def __init__(self, arch, num_classes, num_anchors):
        super(SqueezenetDet, self).__init__()
        self.num_classes = num_classes   # we get the number of classes and anchors
        self.num_anchors = num_anchors   # from the configs file
        
        if arch == 'squeezedet':
            self.features = nn.Sequential(
                nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(64, 16, 64, 64),
                Fire(128, 16, 64, 64),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(128, 32, 128, 128),
                Fire(256, 32, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(256, 48, 192, 192),
                Fire(384, 48, 192, 192),
                Fire(384, 64, 256, 256),
                Fire(512, 64, 256, 256),
                Fire(512, 96, 384, 384),
                Fire(768, 96, 384, 384)
            )
        elif arch == 'squeezedetplus':
            self.features = nn.Sequential(
                nn.Conv2d(3, 96, kernel_size=7, stride=2, padding=3),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(96, 96, 64, 64),
                Fire(128, 96, 64, 64),
                Fire(128, 192, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(256, 192, 128, 128),
                Fire(256, 288, 192, 192),
                Fire(384, 288, 192, 192),
                Fire(384, 384, 256, 256),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(512, 384, 256, 256),
                Fire(512, 384, 256, 256),
                Fire(512, 384, 256, 256),
            )

        else:
            raise ValueError("Unsupported SqueezeNet version")

        # adding a drop out layer, might get rid of it later on
        self.dropout = nn.Dropout(dropout_prob, inplace=True) \
            if dropout_prob > 0 else None
        self.convdet = nn.Conv2d(768 if arch == 'squeezedet' else 512,
                                 anchors_per_grid * (num_classes + 5),  # K (n_classes + 5) from the SqueezeDet paper +1 is for confidence
                                 kernel_size=3, padding=1)                      # score of how likely it is that the object exists in the box
        
        self.init_weights()

    
    
    def forward(self, x):
        x = self.features(x)
        if self.dropout is not None:
            x = self.dropout(x)
        x = self.convdet(x)
        x = x.permute(0, 2, 3, 1).contiguous()          # already doing that with transform so let's compare
        return x.view(-1, self.num_anchors, self.num_classes + 5)
        

    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if m is self.convdet:
                    nn.init.normal_(m.weight, mean=0.0, std=0.002)
                else:
                    nn.init.normal_(m.weight, mean=0.0, std=0.005)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)


class PredictionResolver(nn.Module):
    def __init__(self, input_size, num_classes, anchors, anchors_per_grid, log_softmax=False):
        super(PredictionResolver, self).__init__()
        
        self.log_softmax = log_softmax
        self.input_size = input_size
        self.num_classes = num_classes
        self.anchors = torch.from_numpy(anchors).unsqueeze(0).float()
        self.anchors_per_grid = anchors_per_grid
        
        def forward(self, pred):
            pred_class_probs = safe_softmax(pred[..., :self.num_classes].contiguous(), dim=-1)
            pred_log_class_probs = None if not self.log_softmax else \
                torch.log_softmax(pred[..., :self.num_classes].contiguous(), dim=-1)   # this would not include the +1 for C so we will
            # be fine because we will only have the number of probablities for all the classes expected
            pred_scores = torch.sigmoid(pred[..., self.num_classes:self.num_classes + 1].contiguous())
            pred_deltas = pred[..., self.num_classes + 1:].contiguous()
            pred_boxes = deltas_to_boxes(pred_deltas, self.anchors.to(pred_deltas, device),
                                         input_size=self.input_size)
            return pred_class_probs, pred_log_class_probs, pred_scores, pred_deltas, pred_boxes
    
    # We get an output of num_classes, confidence scores, and then boxes 
    
# Define the loss function next

class Loss(nn.Module):
    def __init__(self, num_anchors, class_loss_weight, positive_score_loss_weight, negative_score_loss_weight, bbox_loss_weights):
        super(Loss, self).__init__()
        self.resolver = PredictionResolver(input_size, num_classes, anchors, anchors_per_grid, log_softmax=True)
        self.num_anchors = num_anchors
        self.class_loss_weight = class_loss_weight
        self.positive_score_loss_weight = positive_score_loss_weight
        self.negative_score_loss_weight = negative_score_loss_weight
        self.bbox_loss_weight = bbox_loss_weights
        
    def forward(self, pred, gt):
        # slice the gt tensor ground truth tensor
        anchor_masks = gt[..., :1]   # for all the rows, get the first one 
        gt_boxes = gt[..., 1:5]      # the next 4 are the boxes
        gt_deltas = gt[..., 5:9]    # the next four are deltas
        gt_class_logits = gt[..., 9:]    # classification of the classes
        
        # resolver predictions
        pred_class_probs, pred_log_class_probs, pred_scores, pred_deltas, pred_boxes = self.resolver(pred)
        
        num_objects = torch.sum(anchor_masks, dim=[1,2])
        overlaps = compute_overlaps(gt_boxes, pred_boxes) * anchor_masks
        
        class_loss = torch.sum(
            self.class_loss_weight * anchor_masks * gt_class_logits * (-pred_log_class_probs),
        dim=[1,2],
        ) / num_objects
        
        positive_score_loss = torch.sum(self.position_score_loss_weight * anchor_masks * (overlaps - pred_scores) ** 2,
                                       dim=[1,2]) / num_objects
        
        negative_score_loss = torch.sum(self.negative_score_loss_weight * (1 - anchor_masks) * (overlaps - pred_scores) ** 2,
                                       dim=[1,2]) / (self.num_anchors - num_objects)
        
        
        bbox_loss = torch.sum(self.bbox_loss_weight * anchor_masks * (pred_deltas - get_deltas) ** 2,
                             dim=[1,2],) / num_objects
        
        loss = class_loss + positive_score_loss + negative_score_loss + bbox_loss
        
        loss_stat = {
            'loss': loss,
            'class_loss': class_loss,
            'score_loss': positive_score_loss + negative_score_loss,
            'bbox_loss': bbox_loss
            
        }
        return loss, loss_stat

    
class SqueezeDetWithLoss(nn.Module):
    def __init__(self, base, loss):
        super(SqueezeDetWithLoss, self).__init__()
        self.base = SqueezenetDet(arch, num_classes, num_anchors)
        self.loss = Loss(num_anchors, class_loss_weight, positive_score_loss_weight, negative_score_loss_weight, bbox_loss_weights)
            
    def forward(self, batch):
        pred = self.base(batch['image'])
        loss, loss_stats = self.loss(pred, batch['gt'])
        return loss, loss_stats

class SqueezeDet(nn.Module):
    """ Inference Model"""
    def __init__(self, base, resolver):
        super(SqueezeDet, self).__init__()
        self.base = SqueezenetDet(num_classes, num_anchors)
        self.resolver = PredictionResolver(input_size, num_classes, anchors, anchors_per_grid, log_softmax=False)
        
    def forward(self, batch):
        pred = self.base(batch['image'])
        pred_class_labels, _, pred_scores, _, pred_boxes = self.resolver(pred)
        pred_class_probs *= pred_scores
        pred_class_ids = torch.argmax(pred_class_probs, dim=2)
        pred_scores = torch.max(pred_class_probs, dim=2)[0]
        det = {'class_ids': pred_class_ids,
              'scores': pred_scores,
              'boxes': pred_boxes}
        
        return det



In [None]:
class PredictionConvolutions(nn.Module):
    """
    Convolutions to predict the class scores and bounding boxes using lower and higher-level feature maps.
    
    """
    def __init__(self, n_classes):
        """
        n_classes: Number of different types of objects
        """
        super(PredictionConvolutions, self).__init__()
        self.n_classes = n_classes 
        
        # since we want the boxes from multiple different feature maps
        # we will define the specific layers we want the boxes from
        n_boxes = {'conv8_2': 6,    # 8_2 and 9_2 are going to have 2 extra boxes with 3:1 and 1:3 aspect ratios
                   'conv9_2': 6,    # because the feature maps are huge from these layers
                   'conv10_2':4,
                   'conv11_2':4}
        
        # the four layers that we want the prior boxes from 
        # then, we are going to stack the feature maps on top of each other
        self.loc_conv8_2 = nn.Conv2d(512, n_boxes['conv8_2'] * 4, kernel_size = 3, padding=1)
        self.loc_conv9_2 = nn.Conv2d(256, n_boxes['conv9_2'] * 4, kernel_size = 3, padding =1)
        self.loc_conv10_2 = nn.Conv2d(256, n_boxes['conv10_2'] * 4, kernel_size = 3, padding =1)
        self.loc_conv11_2 = nn.Conv2d(256, n_boxes['conv11_2'] * 4, kernel_size=3, padding=1)
        
        # class probabilities predictions
        # for all the feature maps we have prior boxes for, we also want the class predictions
        self.cl_conv8_2 = nn.Conv2d(512, n_boxes['conv8_2'] * n_classes, kernel_size=3, padding=1)
        self.cl_conv9_2 = nn.Conv2d(256, n_boxes['conv9_2'] * n_classes, kernel_size=3, padding=1)
        self.cl_conv10_2 = nn.Conv2d(256, n_boxes['conv10_2'] * n_classes, kernel_size=3, padding=1)
        self.cl_conv11_2 = nn.Conv2d(256, n_boxes['conv11_2'] * n_classes, kernel_size=3, padding=1)
        
        # initialize the weights
        
        self.init_conv2d()
        
    def init_conv2d(self):
        """
        Simple function to initialize weights
        """
        for c in self.children():
            if isinstance(c, nn.Conv2d):
                nn.init.xavier_uniform_(c.weight)
                nn.init.constant_(c.bias, 0.)
                
    
    # let's define the forward functions:
    
    def forward(self, conv8_2, conv9_2, conv10_2, conv11_2):
        """
        Takes in the layers we defined in axuiliary convolutions and forwards them while getting the boxes outputs too
        
        """
        
        batch_size = conv8_2_feats.size(0)   # just taking a number from the feature map output by conv8_2 layer
        # seems like we want the number of the boxes output by the conv8_2 layer to be the same 
        
        # let's start with the first layer of the auxiliary convolutions:
        
        # perform the convolution on the feature map of the layer conv8_2 and get the output
        l_conv8_2 = self.loc_conv8_2(conv8_2_feats)    # Outputs: (N, 24, 10, 10)
        # now transform that output to be the same as prior-box order:
        # permute just switches the location of values in the tensor
        l_conv8_2 = l_conv8_2.permute(0, 2, 3, 1).contiguous() # (N, 10, 10, 24)
        # .contiguous()Returns a contiguous in memory tensor containing the same data as self tensor. 
        # If self tensor is already in the specified memory format, this function returns the self tensor.
        l_conv8_2 = l_conv8_2.view(batch_size, -1, 4) # 
        
        l_conv9_2 = self.loc_conv8_2(conv9_2_feats)    # Outputs: (N, 24, 10, 10)
        l_conv9_2 = l_conv9_2.permute(0, 2, 3, 1).contiguous() # (N, 10, 10, 24)
        l_conv9_2 = l_conv9_2.view(batch_size, -1, 4) # (N, 150, 4)
        
        l_conv10_2 = self.loc_conv8_2(conv10_2_feats)    # Outputs: (N, 24, 10, 10)
        l_conv10_2 = l_conv10_2.permute(0, 2, 3, 1).contiguous() # (N, 10, 10, 24)
        l_conv10_2 = l_conv10_2.view(batch_size, -1, 4) # (N, 36, 4)
        
        l_conv11_2 = self.loc_conv8_2(conv11_2_feats)    # Outputs: (N, 24, 10, 10)
        l_conv11_2 = l_conv11_2.permute(0, 2, 3, 1).contiguous() # (N, 10, 10, 24)
        l_conv11_2 = l_conv11_2.view(batch_size, -1, 4) # (N, 4, 4)
        
        # Classification
        # Predict classes for boxes i.e. Classification
        c_conv8_2 = self.cl_conv8_2(conv8_2_feats)
        c_conv8_2 = c_conv8_2.permute(0, 2, 3, 1).contiguous()
        c_conv8_2 = c_conv8_2.view(batch_size, -1, self.n_classes)
        
        c_conv9_2 = self.cl_conv9_2(conv9_2_feats)
        c_conv9_2 = c_conv9_2.permute(0, 2, 3, 1).contiguous()
        c_conv9_2 = c_conv9_2.view(batch_size, -1, self.n_classes)
        
        c_conv10_2 = self.cl_conv10_2(conv10_2_feats)
        c_conv10_2 = c_conv10_2.permute(0, 2, 3, 1).contiguous()
        c_conv10_2 = c_conv10_2.view(batch_size, -1, self.n_classes)
        
        c_conv11_2 = self.cl_conv11_2(conv11_2_feats)
        c_conv11_2 = c_conv11_2.permute(0, 2, 3, 1).contiguous()
        c_conv11_2 = c_conv11_2.view(batch_size, -1, self.n_classes)
        
        locs = torch.cat([l_conv8_2, l_conv9_2, l_conv10_2, l_conv11_2], dim=1)
        class_scores = torch.cat([c_conv8_2, c_conv9_2, c_conv10_2, c_conv11_2])
        
        return locs, class_scores
        

In [None]:
class Squeeze_detect(nn.Module):
    """
    squeeze_detect network that encapsulates the base Squeezenet, auxiliary and prediction convs
    """
    def __init__(self, n_classes):
        super(Squeeze_detect, self).__init__()
        self.n_classes = n_classes
        
        self.base = SqueezeNet()
        self.aux_convs = AuxiliaryConvolutions()
        self.pred_convs = PredictionConvolutions(n_classes)
        
        self.rescale_factors = nn.Parameter(torch.FloatTensor(1, 512, 1, 1))    # there are 512 channels in 8_2 
        nn.init.constant_(self.rescale_factors, 20)
        
        # prior boxes 
        self.prior_cxcy = self.create_prior_boxes()
        
    def forward(self, image):
        """
        Forward Propagation.
        """
        
        # Do not need to do these because we are not really retraining stuff from squeezenet
       # conv8_2_feats = self.base(image)
        
        # rescale the conv8_2 after L2 norm
       # norm = conv8_2_feats.pow(2).sum(dim=1, keepdim=True).sqrt()
        # conv8_2_feats = conv8_2_feats / norm
        # conv8_2_feats = conv8_3_feats * self.rescale_factors
        
        # Auxiliary
        # higher level feature map generators
        # HERE it would be something like SqueezeNet.features[12].expand1x1.weights?
        conv8_2_feats, conv9_2_feats, conv10_2_feats, conv11_2_feats = self.aux_convs(conv7_feats)
        
        # run prediction convolutions
        locs, classes_scores = self.pred_convs(conv8_2_feats, conv9_2_feats, conv10_2_feats, conv11_2_feats)
        
        return locs, class_scores
    
    def create_prior_boxes(self):
        """
        create the ~ 8K numbers of priors but they would be less because we have less layers that we are doing
        calcs on
        """
        
        # the feature map dimensions for each layer, we want the end 
        fmap_dims = {'conv8_2': 10,
                     'conv9_2':5,
                     'conv10_2':3,
                     'conv11_2':1
                    }
    
        obj_scales = {'conv8_2':0.375,
                      'conv9_2':0.55,
                      'conv10_2':0.725,
                      'conv11_2':0.9                     
                     }
        aspect_ratios = {'conv8_2': [1., 2., 3., 0.5, .333],
                         'conv9_2':[1., 2., 3., 0.5, .333],
                         'conv10_2':[1., 2., 0.5],
                         'conv11_2':[1., 2., 0.5]}
    
        
        fmaps = list(fmap_dims.keys())
        
        prior_boxes = []
        
        for k, fmap in enumerate(fmaps):
            for i in range(fmaps_dims[fmap]):
                for j in range(fmaps_dims[fmap]):
                    cx = (j + 0.5) / fmap_dims[fmap]
                    cy = (i + 0.5) / fmap_dims[fmap]
                    
                    for ratio in aspect_ratios[fmap]:
                        prior_boxes.append([cx, cy, obj_scales[fmap] * sqrt(ratio), obj_scales[fmap] /sqrt(ratio)])
                        
                        if ratio == 1:
                            try:
                                additional_scale = sqrt(obj_scales[fmap] * obj_scales[fmaps[k+1]])
                                
                            except:
                                additional_scale = 1.
                                prior_boxes.append([cx, cy, additional_scale, additional_scale])
                                
        prior_boxes = torch.FloatTensor(prior_boxes).to(device)
        prior_boxes.clamp_(0, 1)
        
        return prior_boxes
    
    
    def detect_objects(self, predicted_loss, predicted_scores, min_sccore, max_overlap, top_k):
        """
        
        """
        
        pass

        fmap_dims = {'conv8_2': 10,
             'conv9_2':5,
             'conv10_2':3,
             'conv11_2':1
            }

        
        
        
        
        
        

In [None]:
    fmaps = list(fmap_dims.keys())

In [None]:
for k, fmap in enumerate(fmaps):
    print(k)
    print(fmap)