In [1]:
import os
import json
import numpy as np
import torch
from torchvision import models
from torchvision.models import ResNet18_Weights
from torchvision import transforms as T
from torchvision.datasets import ImageFolder
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import DataLoader
import torch.optim as optim
from copy import deepcopy

In [2]:
device = torch.device('cuda')
model = models.resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
model.eval()

for param in model.parameters():
    param.requires_grad = False

In [3]:
sequential_layer = torch.nn.Sequential(
    torch.nn.Linear(model.fc.in_features, 128),
    torch.nn.ReLU(),
    torch.nn.Dropout(.2),
    torch.nn.Linear(128, 10),
    torch.nn.LogSoftmax(dim=1)
)

model.fc = sequential_layer
model = model.to('cuda')

In [4]:
data_path = './dataset'
train_data_path = os.path.join(data_path, 'train')
test_data_path = os.path.join(data_path, 'test')

In [5]:
train_classes = dict()
test_classes = dict()

for path in sorted(os.listdir(train_data_path)):
    train_classes.setdefault(len(train_classes), path)
    
for path in sorted(os.listdir(test_data_path)):
    test_classes.setdefault(len(test_classes), path)

In [6]:
with open('index_to_name.json', 'w') as fp:
    json.dump(train_classes, fp)

In [7]:
train_transform = T.Compose([
    T.Resize((256,256)),
    T.CenterCrop((224,224)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = T.Compose([
    T.Resize((256,256)),
    T.CenterCrop((224,224)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transform = T.Compose([
    T.Resize((256,256)),
    T.CenterCrop((224,224)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset = ImageFolder(root=train_data_path, transform=train_transform)
val_dataset = ImageFolder(root=train_data_path, transform=val_transform)
test_dataset = ImageFolder(root=test_data_path, transform=val_transform)

In [8]:
val_size = .2

num_train = len(train_dataset)
indices = list(range(num_train))

split = int(np.floor(val_size * num_train))

np.random.seed(69)
np.random.shuffle(indices)

train_idx, val_idx = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(val_idx)

In [9]:
train_loader = DataLoader(train_dataset, batch_size=10, num_workers=0, sampler=train_sampler)
val_loader = DataLoader(val_dataset, batch_size=10, num_workers=0, sampler=val_sampler)
test_loader = DataLoader(test_dataset, batch_size=10, num_workers=0, shuffle=False)

In [10]:
loaders = {'train': train_loader, 'val': val_loader, 'test': test_loader}
dataset_sizes = {'train': len(train_dataset), 'val': len(val_dataset), 'test': len(test_dataset)}

In [11]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=.9)

scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=.1)

In [12]:
%time

for epoch in range(1, 16):
    best_acc = .0
    print(f"\nEpoch {epoch}/{15}\n{'='*25}")
    for phase in ['train', 'val']:
        running_loss = .0
        running_corrects = .0
        if phase == 'train': model.train()
        if phase == 'val': model.eval()
        for inputs, labels in loaders[phase]:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)
                
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels)
        epoch_loss = running_loss / dataset_sizes[phase]
        epoch_acc = running_corrects.double() / dataset_sizes[phase]
        if phase == 'train': scheduler.step()
        if phase == 'val' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_weights = deepcopy(model.state_dict())
        print(f"Loss ({phase}): {epoch_loss}, Acc ({phase}): {epoch_acc}")

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.53 µs

Epoch 1/15
Loss (train): 1.4367195877432823, Acc (train): 0.32575
Loss (val): 0.24233110800385474, Acc (val): 0.124375

Epoch 2/15
Loss (train): 1.0188224367052316, Acc (train): 0.46775
Loss (val): 0.20282511811703444, Acc (val): 0.13625

Epoch 3/15
Loss (train): 0.9267179149016738, Acc (train): 0.494375
Loss (val): 0.19389681428670882, Acc (val): 0.139625

Epoch 4/15
Loss (train): 0.8713768003508449, Acc (train): 0.512375
Loss (val): 0.18633067898452282, Acc (val): 0.141875

Epoch 5/15
Loss (train): 0.8411770219914615, Acc (train): 0.517875
Loss (val): 0.18585949849337338, Acc (val): 0.139875

Epoch 6/15
Loss (train): 0.8033427166193724, Acc (train): 0.535875
Loss (val): 0.1807949926517904, Acc (val): 0.14275000000000002

Epoch 7/15
Loss (train): 0.79221504740417, Acc (train): 0.530625
Loss (val): 0.18437833601608872, Acc (val): 0.14175000000000001

Epoch 8/15
Loss (train): 0.724449780806899, Acc (train): 0.557875
Loss (

In [13]:
torch.save(best_model_weights, 'desserts_resnet18.pth')

In [14]:
%%time

for inputs, labels in test_loader:
    inputs, labels = inputs.to(device), labels.to(device)

    with torch.no_grad():
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

    running_loss += loss.item() * inputs.size(0)
    running_corrects += torch.sum(preds == labels)
    
loss = running_loss / dataset_sizes['test']
acc = running_corrects.double() / dataset_sizes['test']

CPU times: user 1min 1s, sys: 1.98 s, total: 1min 3s
Wall time: 10.5 s


In [15]:
print(f"Test Loss: {epoch_loss}, Test Accuracy: {epoch_acc}")

Test Loss: 0.17541085144504906, Test Accuracy: 0.143875


In [16]:
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  