In [None]:
import torch.nn as nn
import numpy as np
from torch.autograd import Variable
from torch.autograd import Function
from utils import*
from detection_output import Detect
from prior_box import PriorBox
from l2norm import L2norm as L2norm
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.serialization import load_lua
import PIL
from PIL import Image
from f_l2norm import L2norm as norm


In [14]:




class SSD(nn.Module):
    def __init__(self, features1,num_classes):
        super(SSD, self).__init__()
        param=num_classes*3
        self.features1 = features1
        self.features2 = nn.Sequential(
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(512,512,kernel_size=3,padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512,512,kernel_size=3,padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512,512,kernel_size=3,padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3,stride=1,padding=1),
            nn.Conv2d(512,1024,kernel_size=3,padding=6,dilation=6),
            nn.ReLU(inplace=True),
            nn.Conv2d(1024,1024,kernel_size=1),
            nn.ReLU(inplace=True),
        )

        self.features3 = nn.Sequential(
            nn.Conv2d(1024,256,kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256,512,kernel_size=3,stride=2,padding=1),
            nn.ReLU(inplace=True),
        )
        self.features4 = nn.Sequential(
            nn.Conv2d(512,128,kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128,256,kernel_size=3,stride=2,padding=1),
            nn.ReLU(inplace=True),
        )
        self.features5 = nn.Sequential(
            nn.Conv2d(256,128,kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128,256,kernel_size=3,stride=2,padding=1),
            nn.ReLU(inplace=True),
        )
        self.pool6 = nn.Sequential(
            nn.AvgPool2d(kernel_size=3,stride=1),
        )
        #include conv4_3 and loc4_3 here (might as well include p4_3) to make weight transfer easier
        self.L2norm = L2norm(512,20)
        self.l4_3 = nn.Conv2d(512,12,kernel_size=3,padding=1)
        self.c4_3 = nn.Conv2d(512,param,kernel_size=3,padding=1)
        self.p4_3 = PriorBox(num_classes, 300, 300, 30, -1, [1,2,1/2], [0.1, 0.1, 0.2, 0.2], False, True)
        self.lfc7 = nn.Conv2d(1024,24,kernel_size=3,padding=1)
        self.cfc7 = nn.Conv2d(1024,param*2,kernel_size=3,padding=1)
        self.pfc7 = PriorBox(num_classes, 300, 300, 60, 114, [1,1,2,1/2,3,1/3], [0.1, 0.1, 0.2, 0.2], False, True)
        self.l6_2=nn.Conv2d(512,24,kernel_size=3,padding=1)
        self.c6_2=nn.Conv2d(512,param*2,kernel_size=3,padding=1)
        self.p6_2=PriorBox(num_classes, 300, 300, 60, 114, [1,1,2,1/2,3,1/3], [0.1, 0.1, 0.2, 0.2], False, True)
        self.l7_2=nn.Conv2d(256,24,kernel_size=3,padding=1)
        self.c7_2=nn.Conv2d(256,param*2,kernel_size=3,padding=1)
        self.p7_2=PriorBox(num_classes, 300, 300, 168, 222, [1,1,2,1/2,3,1/3], [0.1, 0.1, 0.2, 0.2], False, True)
        self.l8_2=nn.Conv2d(256,24,kernel_size=3,padding=1)
        self.c8_2=nn.Conv2d(256,param*2,kernel_size=3,padding=1)
        self.p8_2=PriorBox(num_classes, 300, 300, 222, 276, [1,1,2,1/2,3,1/3], [0.1, 0.1, 0.2, 0.2], False, True)
        self.lp6 = nn.Conv2d(256,24,kernel_size=3,padding=1)
        self.cp6 = nn.Conv2d(256,param*2,kernel_size=3,padding=1)
        self.pp6=PriorBox(num_classes, 300, 300, 276, 330, [1,1,2,1/2,3,1/3], [0.1, 0.1, 0.2, 0.2], False, True)
        self.softmax = nn.Softmax()
        self.detect = Detect(21, True, 0, 'CENTER', False, 200, 0.01, 0.45, 400, False, 1)
        
    def forward(self, x, phase):
        x = self.features1(x)
        y = self.L2norm(x)
        branch1 = [torch.transpose(torch.transpose(self.l4_3(y),1,2),2,3).contiguous(),torch.transpose(torch.transpose(self.c4_3(y),1,2),2,3).contiguous()]
        p1 = self.p4_3(x)
        return p1
        branch1 = [o.view(o.size(0),-1) for o in branch1]
        x = self.features2(x)
        branch2 = [torch.transpose(torch.transpose(self.lfc7(x),1,2),2,3).contiguous(),torch.transpose(torch.transpose(self.cfc7(x),1,2),2,3).contiguous()]
       
        p2 = self.pfc7(x)
        branch2 = [o.view(o.size(0),-1) for o in branch2]
        x = self.features3(x)
        branch3 = [torch.transpose(torch.transpose(self.l6_2(x),1,2),2,3).contiguous(),torch.transpose(torch.transpose(self.c6_2(x),1,2),2,3).contiguous()]

        p3 = self.p6_2(x)
        branch3 = [o.view(o.size(0),-1) for o in branch3]
        x = self.features4(x)
        branch4 = [torch.transpose(torch.transpose(self.l7_2(x),1,2),2,3).contiguous(),torch.transpose(torch.transpose(self.c7_2(x),1,2),2,3).contiguous()]
        p4 = self.p7_2(x)
        branch4 = [o.view(o.size(0),-1) for o in branch4]
        x = self.features5(x)
        branch5 = [torch.transpose(torch.transpose(self.l8_2(x),1,2),2,3).contiguous(),torch.transpose(torch.transpose(self.c8_2(x),1,2),2,3).contiguous()]
        p5 = self.p8_2(x)
        branch5 = [o.view(o.size(0),-1) for o in branch5]
        x = self.pool6(x)
        branch6 = [torch.transpose(torch.transpose(self.lp6(x),1,2),2,3).contiguous(),torch.transpose(torch.transpose(self.cp6(x),1,2),2,3).contiguous()]
        p6 = self.pp6(x)
        branch6 = [o.view(o.size(0),-1) for o in branch6]
        loc_layers = torch.cat((branch1[0],branch2[0],branch3[0],branch4[0],branch5[0],branch6[0]),1)
        conf_layers = torch.cat((branch1[1],branch2[1],branch3[1],branch4[1],branch5[1],branch6[1]),1)
        box_layers = torch.cat((p1,p2,p3,p4,p5,p6), 2)

        if phase == "test":
            conf_layers = conf_layers.view(-1,21)
            print(conf_layers.size())
            output = self.softmax(conf_layers)
            output = self.detect(loc_layers,conf_layers,box_layers)
        else:
            conf_layers = conf_layers.view(conf_layers.size(0),-1,21)

        return output


def make_layers(cfg, i,  batch_norm=False):
    layers = []
    in_channels = i
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        elif v == 'C':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)


cfg = {
    'A': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512],
}

net = SSD(make_layers(cfg['A'],3),21)


In [15]:
# imsize = 300
# loader = transforms.Compose([
#            # transforms.Scale(imsize),# scale imported image
#            # transforms.CenterCrop(imsize),
#             transforms.ToTensor()]) # transform it into a torch tensor

# def image_loader(image_name):
#     image = Image.open(image_name)
#     image = loader(image)
#     #image = image.unsqueeze(0) # fake batch dimension required to fit network's input dimensions
#     return image


# I = image_loader("../PyTorch Projects/pytorch-ssd/IMG_9505.jpg")

In [16]:
# # input_scale = 0.0078125  # Scale input image
# # input_dim = 300  # input dimension
# raw_scale = 255
# swap = [2,1,0]
# mean_pix = [104,117,123]


# #   local w, h  = img:size(3), img:size(2) 
# # I  = image.scale(pic, input_dim, input_dim)
# I.mul_(raw_scale) 

# out = I.new().resize_as_(I) # swap channels 
# for i in range(I.size(0)):
#     out[i] = I[swap[i]]
# I = out
  
# for i in range(3): # subtract mean from each channel
#     I[i].add(-mean_pix[i]) 

 
# I = I.view(1,I.size(0),I.size(1),I.size(2)) 

In [17]:
img = load_lua('data/img9505.t7')

In [18]:
img = img.float()

In [19]:
ssd = load_lua('../PyTorchProjects/pytorch-ssd/VOC.t7')

In [20]:
norm_weights = load_lua('weights/normWeights.t7')

In [21]:
pytorch_modules = list(net.modules())

In [22]:
next_pytorch_idx = 0
for i, t7_module in enumerate(ssd.modules):
    if not hasattr(t7_module, 'weight'):
        continue
    assert hasattr(t7_module, 'bias')
    while not hasattr(pytorch_modules[next_pytorch_idx], 'weight'):
        next_pytorch_idx += 1
    pytorch_module = pytorch_modules[next_pytorch_idx]
    next_pytorch_idx += 1
    if(pytorch_module.weight.data.dim() == 1):
        pytorch_module = pytorch_modules[next_pytorch_idx]
        next_pytorch_idx += 1
    print(t7_module.weight.size())
    print(pytorch_module.weight.data.size())
    assert(t7_module.weight.size() == pytorch_module.weight.size())
    print('%r Copying data from\n  %r to\n  %r' % (i, t7_module, pytorch_module))

    pytorch_module.weight.data.copy_(t7_module.weight)
    assert(t7_module.bias.size() == pytorch_module.bias.size())
    pytorch_module.bias.data.copy_(t7_module.bias)


torch.Size([64, 3, 3, 3])
torch.Size([64, 3, 3, 3])
0 Copying data from
  nn.SpatialConvolution(3 -> 64, 3x3, 1, 1, 1, 1) to
  Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
torch.Size([64, 64, 3, 3])
torch.Size([64, 64, 3, 3])
2 Copying data from
  nn.SpatialConvolution(64 -> 64, 3x3, 1, 1, 1, 1) to
  Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
torch.Size([128, 64, 3, 3])
torch.Size([128, 64, 3, 3])
5 Copying data from
  nn.SpatialConvolution(64 -> 128, 3x3, 1, 1, 1, 1) to
  Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
torch.Size([128, 128, 3, 3])
torch.Size([128, 128, 3, 3])
7 Copying data from
  nn.SpatialConvolution(128 -> 128, 3x3, 1, 1, 1, 1) to
  Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
torch.Size([256, 128, 3, 3])
torch.Size([256, 128, 3, 3])
10 Copying data from
  nn.SpatialConvolution(128 -> 256, 3x3, 1, 1, 1, 1) to
  Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
tor

In [23]:
pytorch_modules[-14].weight.data.copy_(norm_weights);

In [24]:
detections = net(Variable(img),'test')

In [26]:
detections.size()

torch.Size([1, 2, 17328])

In [29]:
for i in range(17328):
    print(detections.data[0][0][i])

0.0
-0.03684210404753685
-0.03684210404753685
0.06315789371728897
0.06315789371728897
-0.05755278468132019
-0.02219744399189949
0.08386857062578201
0.04851323366165161
-0.02219744399189949
-0.05755278468132019
0.04851323366165161
0.08386857062578201
-0.010526316240429878
-0.03684210404753685
0.08947368711233139
0.06315789371728897
-0.03123699314892292
-0.02219744399189949
0.11018436402082443
0.04851323366165161
0.004118345212191343
-0.05755278468132019
0.07482901960611343
0.08386857062578201
0.015789473429322243
-0.03684210404753685
0.11578947305679321
0.06315789371728897
-0.004921204410493374
-0.02219744399189949
0.13650014996528625
0.04851323366165161
0.03043413534760475
-0.05755278468132019
0.10114481300115585
0.08386857062578201
0.04210526496171951
-0.03684210404753685
0.14210526645183563
0.06315789371728897
0.021394584327936172
-0.02219744399189949
0.16281594336032867
0.04851323366165161
0.05674992501735687
-0.05755278468132019
0.12746059894561768
0.08386857062578201
0.06842105090

In [25]:
for i in range(detections.data.size(1)):
    print(detections.data[0][i])


 0.0000
-0.0368
-0.0368
   ⋮   
 0.0000
 0.0000
 0.0000
[torch.FloatTensor of size 17328]


 0
 0
 0
⋮ 
 0
 0
 0
[torch.FloatTensor of size 17328]



In [None]:
detections = detections.data.numpy()

In [None]:
detections.shape

In [None]:
detections[0][1][6]

In [None]:
# Parse the outputs.
det_label = detections[0,:,1]
det_conf = detections[0,:,2]
det_xmin = detections[0,:,3]
det_ymin = detections[0,:,4]
det_xmax = detections[0,:,5]
det_ymax = detections[0,:,6]

# Get detections with confidence higher than 0.6.
top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.6]

top_conf = det_conf[top_indices]
top_label_indices = det_label[top_indices].tolist()
#top_labels = get_labelname(labelmap, top_label_indices)
top_xmin = det_xmin[top_indices]
top_ymin = det_ymin[top_indices]
top_xmax = det_xmax[top_indices]
top_ymax = det_ymax[top_indices]

In [None]:
top_indices

In [None]:
import matplotlib.pyplot as plt
colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist()

plt.imshow(image)
currentAxis = plt.gca()

for i in xrange(top_conf.shape[0]):
    xmin = int(round(top_xmin[i] * image.shape[1]))
    ymin = int(round(top_ymin[i] * image.shape[0]))
    xmax = int(round(top_xmax[i] * image.shape[1]))
    ymax = int(round(top_ymax[i] * image.shape[0]))
    score = top_conf[i]
    label = int(top_label_indices[i])
    label_name = top_labels[i]
    display_txt = '%s: %.2f'%(label_name, score)
    coords = (xmin, ymin), xmax-xmin+1, ymax-ymin+1
    color = colors[label]
    currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2))
    currentAxis.text(xmin, ymin, display_txt, bbox={'facecolor':color, 'alpha':0.5})

In [None]:
import os
cwd = os.getcwd()

In [None]:
ssd.modules

In [None]:
pytorch_modules[-14].

In [None]:
8340-7308

In [None]:
cwd