In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from PIL import Image

##### Input size = [3, 385, 1085]

In [37]:
def swish(x):
    return x/(1+torch.exp(-x))

# Custom 2d average pooling. Used directly prior to the fully connected layer(s)
#
# dims: Tuple representing the two dimensions to pool.
# keep_dims: Boolean determining if output should preserve number of input dimensions.
def avg_pool(x, dims, keep_dims=False):
    summed_tensor = torch.sum(x, dim=dims)
    if keep_dims:
        reshaped_tensor = summed_tensor.unsqueeze(dim=dims[0]).unsqueeze(dim=dims[1])
        return reshaped_tensor
    return summed_tensor

In [46]:
# MobileNET arch
class SingNET(nn.Module):
    def __init__(self):
        super(SingNET, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU()
        
        self.layer1 = nn.Sequential(
            nn.Conv2d(32, 32, kernel_size=3, stride=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            
            nn.Conv2d(32, 64, kernel_size=1, stride=1),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, stride=2),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            
            nn.Conv2d(64, 128, kernel_size=1, stride=1),
            nn.BatchNorm2d(128),
            nn.ReLU()
        )
        
        self.layer3 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            
            nn.Conv2d(128, 128, kernel_size=1, stride=1),
            nn.BatchNorm2d(128),
            nn.ReLU()
        )
        
        self.layer4 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=2),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            
            nn.Conv2d(128, 256, kernel_size=1, stride=1),
            nn.BatchNorm2d(256),
            nn.ReLU()
        )
        
        self.layer5 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            
            nn.Conv2d(256, 256, kernel_size=1, stride=1),
            nn.BatchNorm2d(256),
            nn.ReLU()
        )
        
        self.layer6 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=2),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            
            nn.Conv2d(256, 512, kernel_size=1, stride=1),
            nn.BatchNorm2d(512),
            nn.ReLU()
        )
        
        self.layer7_12 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            
            nn.Conv2d(512, 512, kernel_size=1, stride=1),
            nn.BatchNorm2d(512),
            nn.ReLU()
        )
        
        self.layer13 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=2),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            
            nn.Conv2d(512, 1024, kernel_size=1, stride=1),
            nn.BatchNorm2d(1024),
            nn.ReLU()
        )
        
        self.layer14 = nn.Sequential(
            nn.Conv2d(1024, 1024, kernel_size=3, stride=2),
            nn.BatchNorm2d(1024),
            nn.ReLU(),
            
            nn.Conv2d(1024, 1024, kernel_size=1, stride=1),
            nn.BatchNorm2d(1024),
            nn.ReLU()
        )
        
        self.fc = nn.Linear(1024, 5)
        
        
        
    def forward(self, x):
        out = self.bn1(self.conv1(x))
        out = self.relu(out)
        
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        out = self.layer7_12(out)
        out = self.layer13(out)
        out = self.layer14(out)
        
        out = avg_pool(out, (2,3))
        out = self.fc(out)
        
        

In [42]:
img = Image.open("data/costa_rica/spectrogram/Broad-billed_Motmot_105_0.jpg")
transform = transforms.Compose([
    transforms.ToTensor()
])

img_tensor = transform(img).unsqueeze(0)

In [43]:
img_tensor.size()

torch.Size([1, 3, 385, 1085])

In [47]:
net = SingNET()

net.forward(img_tensor)