In [32]:
import torch
import torchvision
from torchvision import transforms
import torch.nn as nn
import glob
import os
import random
from PIL import Image
from tqdm import tqdm

In [33]:
class Transformer():
    def __init__(self, size, mean, std):
        self.transform = transforms.Compose([
            transforms.RandomResizedCrop(size, scale=(0.5, 1.0)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ])

    def __call__(self, img):
        return self.transform(img)

In [34]:
class HymenopteraDataset(torch.utils.data.Dataset):
    def __init__(self, transformer, phase):
        self.file_list = glob.glob(os.path.join('./data/hymenoptera_data', phase, '**/*.jpg'))
        self.transformer = transformer

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, index):
        img_path = self.file_list[index]
        img_transformed = self.transformer(Image.open(img_path))
        label = img_path.split('/')[-2]
        label = 0 if label=='ants' else 1
        return img_transformed, label

In [35]:
size = 224
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)

In [36]:
train_dataset = HymenopteraDataset(Transformer(size, mean, std), 'train')
val_dataset = HymenopteraDataset(Transformer(size, mean, std), 'val')

In [37]:
batch_size = 32
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

In [38]:
model = torchvision.models.vgg16(pretrained=True)
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [39]:
model.classifier[6] = nn.Linear(in_features=4096, out_features=2, bias=True)

In [40]:
param_name_to_update = ['classifier.6.weight', 'classifier.6.bias']
param_to_update = []
for name, param in model.named_parameters():
    if name in param_name_to_update:
        param.requires_grad = True
        param_to_update.append(param)
    else:
        param.requires_grad = False
param_to_update

[Parameter containing:
 tensor([[-0.0059,  0.0110, -0.0134,  ..., -0.0090, -0.0046,  0.0031],
         [ 0.0114,  0.0006, -0.0043,  ..., -0.0094, -0.0103, -0.0088]],
        requires_grad=True),
 Parameter containing:
 tensor([-0.0084,  0.0092], requires_grad=True)]

In [41]:
optimizer = torch.optim.Adam(param_to_update, lr=0.001)

In [46]:
criterion = torch.nn.CrossEntropyLoss()

In [42]:
dataloader_dict = {
    'train': train_dataloader,
    'val': val_dataloader
}

In [47]:
for epoch in range(1, 2):
    for phase in ['train', 'val']:
        model.train if phase=='train' else model.eval()
        for inputs, labels in dataloader_dict[phase]:
            optimizer.zero_grad()
            with torch.set_grad_enabled(phase=='train'):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                if phase=='train':
                    loss.backward()
                    optimizer.step()
            print(loss.item())

0.6769014000892639
0.5217145085334778
0.40347787737846375
0.29061099886894226
0.1954514980316162
0.21213524043560028
0.29314523935317993
0.2170630544424057
0.14757533371448517
0.15566052496433258
0.09762852638959885
0.1305747777223587
0.18618687987327576


In [44]:
outputs

tensor([[-0.4279,  0.6499],
        [ 0.3155, -0.4879],
        [ 0.2264,  0.9292],
        [-0.2829, -0.1526],
        [ 0.0232,  0.6556],
        [-0.6916,  0.5403],
        [ 0.4781,  0.3159],
        [ 1.6673,  0.3508],
        [ 0.4930,  0.7440],
        [-0.1169,  1.0106],
        [ 0.4786,  0.2946],
        [ 0.5993,  0.2022],
        [ 0.8030,  0.5003],
        [ 0.2679,  0.1262],
        [-0.7878,  0.5535],
        [ 0.0423,  0.7180],
        [ 0.2794,  0.7567],
        [ 0.0873,  0.6222],
        [ 1.2246,  1.0483],
        [ 0.4617,  1.4134],
        [ 0.3000, -0.0237],
        [ 0.1900,  0.6673],
        [-0.7841,  1.4829],
        [ 0.8830,  0.0101],
        [-0.2396,  0.7908],
        [-0.1861,  1.0922],
        [-0.0040,  1.2910],
        [-0.1528,  1.9742],
        [ 0.9849,  0.6708],
        [ 0.4597, -0.1098],
        [-0.2933,  1.0022],
        [-1.1290,  0.9574]], grad_fn=<AddmmBackward0>)