forked from amdegroot/ssd.pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Try to learn from https://github.com/ShuangXieIrene/ssds.pytorch but …
…not success yet
- Loading branch information
Showing
5 changed files
with
374 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
from torch import nn | ||
|
||
|
||
__all__ = ['MobileNetV2', 'mobilenet_v2'] | ||
|
||
|
||
model_urls = { | ||
'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth', | ||
} | ||
|
||
|
||
def _make_divisible(v, divisor, min_value=None): | ||
""" | ||
This function is taken from the original tf repo. | ||
It ensures that all layers have a channel number that is divisible by 8 | ||
It can be seen here: | ||
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py | ||
:param v: | ||
:param divisor: | ||
:param min_value: | ||
:return: | ||
""" | ||
if min_value is None: | ||
min_value = divisor | ||
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) | ||
# Make sure that round down does not go down by more than 10%. | ||
if new_v < 0.9 * v: | ||
new_v += divisor | ||
return new_v | ||
|
||
|
||
class ConvBNReLU(nn.Sequential): | ||
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): | ||
padding = (kernel_size - 1) // 2 | ||
super(ConvBNReLU, self).__init__( | ||
nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False), | ||
nn.BatchNorm2d(out_planes), | ||
nn.ReLU6(inplace=True) | ||
) | ||
|
||
|
||
class InvertedResidual(nn.Module): | ||
def __init__(self, inp, oup, stride, expand_ratio): | ||
super(InvertedResidual, self).__init__() | ||
self.stride = stride | ||
assert stride in [1, 2] | ||
|
||
hidden_dim = int(round(inp * expand_ratio)) | ||
self.use_res_connect = self.stride == 1 and inp == oup | ||
|
||
layers = [] | ||
#if expand_ratio != 1: | ||
# pw | ||
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) | ||
layers.extend([ | ||
# dw | ||
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), | ||
# pw-linear | ||
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), | ||
nn.BatchNorm2d(oup), | ||
]) | ||
self.conv = nn.Sequential(*layers) | ||
|
||
def forward(self, x): | ||
if self.use_res_connect: | ||
return x + self.conv(x) | ||
else: | ||
return self.conv(x) | ||
|
||
|
||
class MobileNetV2(nn.Module): | ||
def __init__(self, num_classes=1000, width_mult=1.0, inverted_residual_setting=None, round_nearest=8): | ||
""" | ||
MobileNet V2 main class | ||
Args: | ||
num_classes (int): Number of classes | ||
width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount | ||
inverted_residual_setting: Network structure | ||
round_nearest (int): Round the number of channels in each layer to be a multiple of this number | ||
Set to 1 to turn off rounding | ||
""" | ||
super(MobileNetV2, self).__init__() | ||
block = InvertedResidual | ||
input_channel = 32 | ||
last_channel = 1280 | ||
|
||
if inverted_residual_setting is None: | ||
inverted_residual_setting = [ | ||
# t, c, n, s | ||
[1, 16, 1, 1], | ||
[6, 24, 2, 2], | ||
[6, 32, 3, 2], | ||
[6, 64, 4, 2], | ||
[6, 96, 3, 1], | ||
[6, 160, 3, 2], | ||
[6, 320, 1, 1], | ||
] | ||
|
||
# only check the first element, assuming user knows t,c,n,s are required | ||
if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: | ||
raise ValueError("inverted_residual_setting should be non-empty " | ||
"or a 4-element list, got {}".format(inverted_residual_setting)) | ||
|
||
# building first layer | ||
input_channel = _make_divisible(input_channel * width_mult, round_nearest) | ||
self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) | ||
features = [ConvBNReLU(3, input_channel, stride=2)] | ||
# building inverted residual blocks | ||
for t, c, n, s in inverted_residual_setting: | ||
output_channel = _make_divisible(c * width_mult, round_nearest) | ||
for i in range(n): | ||
stride = s if i == 0 else 1 | ||
features.append(block(input_channel, output_channel, stride, expand_ratio=t)) | ||
input_channel = output_channel | ||
# building last several layers | ||
features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1)) | ||
# make it nn.Sequential | ||
# self.features = nn.Sequential(*features) | ||
self.features = features | ||
|
||
# building classifier | ||
self.classifier = nn.Sequential( | ||
nn.Dropout(0.2), | ||
nn.Linear(self.last_channel, num_classes), | ||
) | ||
|
||
# weight initialization | ||
for m in self.modules(): | ||
if isinstance(m, nn.Conv2d): | ||
nn.init.kaiming_normal_(m.weight, mode='fan_out') | ||
if m.bias is not None: | ||
nn.init.zeros_(m.bias) | ||
elif isinstance(m, nn.BatchNorm2d): | ||
nn.init.ones_(m.weight) | ||
nn.init.zeros_(m.bias) | ||
elif isinstance(m, nn.Linear): | ||
nn.init.normal_(m.weight, 0, 0.01) | ||
nn.init.zeros_(m.bias) | ||
|
||
def forward(self, x): | ||
endpoints = [] | ||
# fetch layer 6 (38x38), 13 (19x19), 17 (10x10) as detection features | ||
layers = [] | ||
for index, layer in enumerate(self.features): | ||
x = layer(x) | ||
layers.append(layer) | ||
if index in [13, 17]: | ||
endpoints.append(x) | ||
if index == 17: | ||
break | ||
# x = self.features(x) | ||
# x = x.mean([2, 3]) | ||
# x = self.classifier(x) | ||
return x, endpoints | ||
|
||
|
||
def mobilenet_v2(pretrained=False, progress=True, **kwargs): | ||
""" | ||
Constructs a MobileNetV2 architecture from | ||
`"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_. | ||
Args: | ||
pretrained (bool): If True, returns a model pre-trained on ImageNet | ||
progress (bool): If True, displays a progress bar of the download to stderr | ||
""" | ||
model = MobileNetV2(**kwargs) | ||
if pretrained: | ||
state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'], | ||
progress=progress) | ||
model.load_state_dict(state_dict) | ||
return model |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
import torch | ||
import torch.nn as nn | ||
import torch.nn.functional as F | ||
from torch.autograd import Variable | ||
from layers import * | ||
from data import voc, coco, cub | ||
from nets import mobilenet | ||
import os | ||
|
||
|
||
class SSDMobileNetV2(nn.Module): | ||
"""Single Shot Multibox Architecture | ||
The network is composed of a base MobileNetV2 followed by the | ||
added multibox conv layers. Each multibox layer branches into | ||
1) conv2d for class conf scores | ||
2) conv2d for localization predictions | ||
3) associated priorbox layer to produce default bounding | ||
boxes specific to the layer's feature map size. | ||
See: https://arxiv.org/pdf/1512.02325.pdf for more details. | ||
Args: | ||
phase: (string) Can be "test" or "train" | ||
size: input image size | ||
extras: extra layers that feed to multibox loc and conf layers | ||
head: "multibox head" consists of loc and conf conv layers | ||
""" | ||
|
||
def __init__(self, phase, size, extras, head, num_classes): | ||
super(SSDMobileNetV2, self).__init__() | ||
self.phase = phase | ||
self.num_classes = num_classes | ||
switcher = { | ||
2: cub, | ||
21: voc | ||
} | ||
self.cfg = switcher.get(num_classes, coco) | ||
self.priorbox = PriorBox(self.cfg) | ||
self.priors = Variable(self.priorbox.forward(), volatile=True) | ||
self.size = size | ||
|
||
# SSD network | ||
self.backbone = mobilenet.MobileNetV2(num_classes=num_classes, width_mult=1.0) | ||
self.norm = L2Norm(96, 20) | ||
self.extras = nn.ModuleList(extras) | ||
|
||
self.loc = nn.ModuleList(head[0]) | ||
self.conf = nn.ModuleList(head[1]) | ||
|
||
if phase == 'test': | ||
self.softmax = nn.Softmax(dim=-1) | ||
self.detect = Detect(num_classes, 0, 200, 0.01, 0.45) | ||
|
||
def forward(self, x): | ||
"""Applies network layers and ops on input image(s) x. | ||
Args: | ||
x: input image or batch of images. Shape: [batch,3,300,300]. | ||
Return: | ||
Depending on phase: | ||
test: | ||
Variable(tensor) of output class label predictions, | ||
confidence score, and corresponding location predictions for | ||
each object detected. Shape: [batch,topk,7] | ||
train: | ||
list of concat outputs from: | ||
1: confidence layers, Shape: [batch*num_priors,num_classes] | ||
2: localization layers, Shape: [batch,num_priors*4] | ||
3: priorbox layers, Shape: [2,num_priors*4] | ||
""" | ||
sources = list() | ||
loc = list() | ||
conf = list() | ||
|
||
_, endpoints = self.backbone(x) | ||
sources.append(self.norm(endpoints[0])) | ||
sources.append(endpoints[1]) | ||
x = endpoints[1] | ||
|
||
# apply extra layers and cache source layer outputs | ||
for k, v in enumerate(self.extras): | ||
x = F.relu(v(x), inplace=True) | ||
#x = v(x) | ||
#if k % 2 == 1: | ||
sources.append(x) | ||
|
||
# apply multibox head to source layers | ||
for (x, l, c) in zip(sources, self.loc, self.conf): | ||
loc.append(l(x).permute(0, 2, 3, 1).contiguous()) | ||
conf.append(c(x).permute(0, 2, 3, 1).contiguous()) | ||
|
||
loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) | ||
conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) | ||
if self.phase == "test": | ||
output = self.detect( | ||
loc.view(loc.size(0), -1, 4), # loc preds | ||
self.softmax(conf.view(conf.size(0), -1, | ||
self.num_classes)), # conf preds | ||
self.priors.type(type(x.data)) # default boxes | ||
) | ||
else: | ||
output = ( | ||
loc.view(loc.size(0), -1, 4), | ||
conf.view(conf.size(0), -1, self.num_classes), | ||
self.priors | ||
) | ||
return output | ||
|
||
def load_weights(self, base_file): | ||
other, ext = os.path.splitext(base_file) | ||
if ext == '.pkl' or '.pth': | ||
print('Loading weights into state dict...') | ||
self.load_state_dict(torch.load(base_file, | ||
map_location=lambda storage, loc: storage)) | ||
print('Finished!') | ||
else: | ||
print('Sorry only .pth and .pkl files supported.') | ||
|
||
|
||
'''def add_extras(cfg, i, batch_norm=False): | ||
# Extra layers added to VGG for feature scaling | ||
layers = [] | ||
in_channels = i | ||
flag = False | ||
for k, v in enumerate(cfg): | ||
if in_channels != 'S': | ||
if v == 'S': | ||
layers += [nn.Conv2d(in_channels, cfg[k + 1], | ||
kernel_size=(1, 3)[flag], stride=2, padding=1)] | ||
else: | ||
layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])] | ||
flag = not flag | ||
in_channels = v | ||
return layers''' | ||
|
||
def add_extras(cfg): | ||
layers = [] | ||
block = mobilenet.InvertedResidual | ||
|
||
layers.append(block(320, 512, 2, 512.0/320.0)) | ||
layers.append(block(512, 256, 2, 0.5)) | ||
layers.append(block(256, 256, 2, 1)) | ||
layers.append(block(256, 128, 2, 0.5)) | ||
|
||
return layers | ||
|
||
def multibox(extra_layers, cfg, num_classes): | ||
loc_layers = [] | ||
conf_layers = [] | ||
mobilenet_channels = [96, 320] | ||
for k, channel in enumerate(mobilenet_channels): | ||
loc_layers += [nn.Conv2d(channel, | ||
cfg[k] * 4, kernel_size=3, padding=1)] | ||
conf_layers += [nn.Conv2d(channel, | ||
cfg[k] * num_classes, kernel_size=3, padding=1)] | ||
out_channels = [0, 0, 512, 256, 256, 128] | ||
for k, v in enumerate(extra_layers, 2): | ||
loc_layers += [nn.Conv2d(out_channels[k], cfg[k] | ||
* 4, kernel_size=3, padding=1)] | ||
conf_layers += [nn.Conv2d(out_channels[k], cfg[k] | ||
* num_classes, kernel_size=3, padding=1)] | ||
return extra_layers, (loc_layers, conf_layers) | ||
|
||
|
||
extras = { | ||
'300': [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256], | ||
'512': [], | ||
} | ||
mbox = { | ||
'300': [6, 6, 6, 6, 4, 4], # number of boxes per feature map location | ||
'512': [], | ||
} | ||
|
||
|
||
def build_ssd_mobilenet(phase, size=300, num_classes=21): | ||
if phase != "test" and phase != "train": | ||
print("ERROR: Phase: " + phase + " not recognized") | ||
return | ||
if size != 300: | ||
print("ERROR: You specified size " + repr(size) + ". However, " + | ||
"currently only SSD300 (size=300) is supported!") | ||
return | ||
extras_, head_ = multibox(add_extras(extras[str(size)]), | ||
mbox[str(size)], num_classes) | ||
return SSDMobileNetV2(phase, size, extras_, head_, num_classes) |
Oops, something went wrong.