In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

##### Input size = [3, 271, 781]

In [2]:
def swish(x):
    return x/(1+torch.exp(-x))

# Custom 2d average pooling. Used directly prior to the fully connected layer(s)
#
# dims: Tuple representing the two dimensions to pool.
# keep_dims: Boolean determining if output should preserve number of input dimensions.
def avg_pool(x, dims, keep_dims=False):
    summed_tensor = torch.sum(x, dim=dims)
    if keep_dims:
        reshaped_tensor = summed_tensor.unsqueeze(dim=dims[0]).unsqueeze(dim=dims[1])
        return reshaped_tensor
    return summed_tensor

In [45]:
# MobileNET arch
class SingNET(nn.Module):
    def __init__(self):
        super(SingNET, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU()
        
        self.layer1 = nn.Sequential(
            nn.Conv2d(32, 32, kernel_size=3, stride=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            
            nn.Conv2d(32, 32, kernel_size=1, stride=1),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 32, kernel_size=3, stride=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            
            nn.Conv2d(32, 64, kernel_size=1, stride=1),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, stride=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            
            nn.Conv2d(64, 64, kernel_size=1, stride=1),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        
        self.layer4 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, stride=2),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            
            nn.Conv2d(64, 64, kernel_size=1, stride=1),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        
        self.layer5 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, stride=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            
            nn.Conv2d(64, 128, kernel_size=1, stride=1),
            nn.BatchNorm2d(128),
            nn.ReLU()
        )
        
        self.layer6 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            
            nn.Conv2d(128, 128, kernel_size=1, stride=1),
            nn.BatchNorm2d(128),
            nn.ReLU()
        )
        
        self.layer7_12 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            
            nn.Conv2d(128, 128, kernel_size=1, stride=1),
            nn.BatchNorm2d(128),
            nn.ReLU()
        )
        
        self.layer13 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=2),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            
            nn.Conv2d(128, 256, kernel_size=1, stride=1),
            nn.BatchNorm2d(256),
            nn.ReLU()
        )
        
        self.layer14 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=2),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            
            nn.Conv2d(256, 512, kernel_size=1, stride=1),
            nn.BatchNorm2d(512),
            nn.ReLU()
        )
        
        self.fc = nn.Linear(512, 5)
        
        
        
    def forward(self, x):
        out = self.bn1(self.conv1(x))
        out = self.relu(out)
        
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        out = self.layer7_12(out)
        out = self.layer7_12(out)
        out = self.layer7_12(out)
        out = self.layer7_12(out)
        out = self.layer7_12(out)
        out = self.layer7_12(out)
        out = self.layer13(out)
        out = self.layer14(out)
        
        out = avg_pool(out, (2,3))
        out = self.fc(out)
        return out
        
        

In [42]:
img = Image.open("data/costa_rica/train_dn/Yellow-throated_Toucan/Yellow-throated_Toucan_146_0dn.jpg")
transform = transforms.Compose([
    transforms.ToTensor()
])

img_tensor = transform(img).unsqueeze(0)

In [43]:
img_tensor.size()

torch.Size([1, 3, 271, 781])

In [44]:
net = SingNET()

net.forward(img_tensor)

torch.Size([1, 512, 3, 19])


tensor([[-7.2369, 38.8524,  1.7380,  1.2628, 22.6226]],
       grad_fn=<AddmmBackward0>)

In [38]:
num_params = sum(p.numel() for p in net.parameters())
print(num_params)

1387589


In [39]:
def train(net, epochs=100, batch_size=8, lr=0.005, L2_reg=0.01, saved_filepath="saved_models/test_model.pt"):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f"Device in use: {device}")
    if device == "cuda":
        torch.cuda.empty_cache()
        #torch.cuda.max_memory_allocated(max_split_size_mb=1024)
    net.to(device)
    print("==> Starting Data Preparation...")
    data_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225])
    ])
    train_set = ImageFolder("data/costa_rica/train", transform=data_transform)
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=1)
    val_set = ImageFolder("data/costa_rica/validation", transform=data_transform)
    val_loader = DataLoader(val_set, batch_size=1, shuffle=False, num_workers=1)
    
    loss_function = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=lr, weight_decay=L2_reg)
    #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[int(epochs*0.5), int(epochs*0.75)], gamma=0.1)
    best_acc = 0
    
    print("==> Training Initiated...")
    for epoch in range(0, epochs):
        print(f"Epoch {epoch}/{epochs}")
        net.train()
        train_loss = 0
        correct = 0
        total = 0
        
        for batch_idx, (inputs, targets) in enumerate(train_loader):
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = net.forward(inputs)
            loss = loss_function(outputs, targets)
            loss.backward()
            
            optimizer.step()
            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            
        print(f"Train Loss: {round(train_loss,3)} | Train Acc: {round(correct/total,3)}")
        #scheduler.step()
        
        net.eval()
        val_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for batch_idx, (inputs, targets) in enumerate(val_loader):
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = net.forward(inputs)
                loss = loss_function(outputs, targets)
                
                val_loss += loss.item()
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()
        val_acc = correct/total
        
        print(f"Val Loss:   {round(val_loss,3)} | Val Acc:   {round(val_acc,3)}")
        
        if val_acc > best_acc:
            best_acc = val_acc
            print("Saving Model...")
            torch.save(net.state_dict(), saved_filepath)
    
    print("=========> Training Complete <=========")
    print(f"Best Validation Accuracy: {best_acc}")
            
            
    

In [40]:
net = SingNET()
train(net, epochs=100, batch_size=8, lr=0.005, L2_reg=0.03, saved_filepath="saved_models/reg_03_dn.pt")

Device in use: cuda
==> Starting Data Preparation...
==> Training Initiated...
Epoch 0/100
Train Loss: 1038.337 | Train Acc: 0.414
Val Loss:   373.306 | Val Acc:   0.481
Saving Model...
Epoch 1/100
Train Loss: 430.989 | Train Acc: 0.518
Val Loss:   416.491 | Val Acc:   0.475
Epoch 2/100
Train Loss: 283.709 | Train Acc: 0.536
Val Loss:   219.16 | Val Acc:   0.463
Epoch 3/100
Train Loss: 206.582 | Train Acc: 0.592
Val Loss:   177.366 | Val Acc:   0.569
Saving Model...
Epoch 4/100
Train Loss: 178.44 | Train Acc: 0.657
Val Loss:   255.481 | Val Acc:   0.619
Saving Model...
Epoch 5/100
Train Loss: 161.45 | Train Acc: 0.696
Val Loss:   160.586 | Val Acc:   0.562
Epoch 6/100
Train Loss: 177.501 | Train Acc: 0.656
Val Loss:   158.193 | Val Acc:   0.688
Saving Model...
Epoch 7/100
Train Loss: 163.371 | Train Acc: 0.677
Val Loss:   175.733 | Val Acc:   0.65
Epoch 8/100
Train Loss: 153.917 | Train Acc: 0.705
Val Loss:   142.373 | Val Acc:   0.738
Saving Model...
Epoch 9/100
Train Loss: 150.089 | 

KeyboardInterrupt: 