In [3]:
data_path = '../dataset'

In [1]:
import os

In [4]:
for path in os.listdir(data_path):
    if os.path.isdir(os.path.join(data_path, path)):
        print(os.path.join(data_path, path))

../dataset\sanity
../dataset\test
../dataset\train


In [5]:
train_data_path = os.path.join(data_path, 'train')
test_data_path = os.path.join(data_path, 'test')

In [6]:
train_classes = dict()

for path in sorted(os.listdir(train_data_path)):
    if os.path.isdir(os.path.join(train_data_path, path)):
        train_classes.setdefault(len(train_classes), path)
        
train_classes

{0: 'chicken_curry',
 1: 'chicken_wings',
 2: 'fried_rice',
 3: 'grilled_salmon',
 4: 'hamburger',
 5: 'ice_cream',
 6: 'pizza',
 7: 'ramen',
 8: 'steak',
 9: 'sushi'}

In [8]:
import json

with open('../model/index_to_name.json', 'w') as fp:
    json.dump(train_classes, fp)

In [9]:
test_classes = dict()

for path in sorted(os.listdir(test_data_path)):
    if os.path.isdir(os.path.join(test_data_path, path)):
        test_classes.setdefault(len(test_classes), path)
        
test_classes

{0: 'chicken_curry',
 1: 'chicken_wings',
 2: 'fried_rice',
 3: 'grilled_salmon',
 4: 'hamburger',
 5: 'ice_cream',
 6: 'pizza',
 7: 'ramen',
 8: 'steak',
 9: 'sushi'}

In [10]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [11]:
torch.cuda.get_device_name(0)

AssertionError: Torch not compiled with CUDA enabled

In [10]:
torch.cuda.empty_cache()

In [12]:
from torchvision import transforms as T

In [13]:
train_transform = T.Compose([
    T.Resize((256,256)),
    T.CenterCrop((224,224)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [14]:
val_transform = T.Compose([
    T.Resize((256,256)),
    T.CenterCrop((224,224)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [15]:
test_transform = T.Compose([
    T.Resize((256,256)),
    T.CenterCrop((224,224)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [16]:
from torchvision.datasets import ImageFolder

In [17]:
train_dataset = ImageFolder(
    root=train_data_path,
    transform=train_transform
)

In [18]:
train_dataset

Dataset ImageFolder
    Number of datapoints: 750
    Root location: ../dataset\train
    StandardTransform
Transform: Compose(
               Resize(size=(256, 256), interpolation=bilinear, max_size=None, antialias=None)
               CenterCrop(size=(224, 224))
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [19]:
val_dataset = ImageFolder(
    root=train_data_path,
    transform=val_transform
)

In [20]:
val_dataset

Dataset ImageFolder
    Number of datapoints: 750
    Root location: ../dataset\train
    StandardTransform
Transform: Compose(
               Resize(size=(256, 256), interpolation=bilinear, max_size=None, antialias=None)
               CenterCrop(size=(224, 224))
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [21]:
test_dataset = ImageFolder(
    root=test_data_path,
    transform=val_transform
)

In [22]:
test_dataset

Dataset ImageFolder
    Number of datapoints: 2500
    Root location: ../dataset\test
    StandardTransform
Transform: Compose(
               Resize(size=(256, 256), interpolation=bilinear, max_size=None, antialias=None)
               CenterCrop(size=(224, 224))
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

In [23]:
import numpy as np

In [24]:
# Reference: https://gist.github.com/kevinzakka/d33bf8d6c7f06a9d8c76d97a7879f5cb

val_size = .2

num_train = len(train_dataset)
indices = list(range(num_train))
split = int(np.floor(val_size * num_train))

random_seed = 42 # FYI: https://en.wikipedia.org/wiki/Phrases_from_The_Hitchhiker%27s_Guide_to_the_Galaxy#On_the_Internet_and_in_software

np.random.seed(random_seed)
np.random.shuffle(indices)

In [25]:
from torch.utils.data.sampler import SubsetRandomSampler

In [28]:
train_idx, val_idx = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(val_idx)

<torch.utils.data.sampler.SubsetRandomSampler object at 0x0000018F71DF8BE0>


In [29]:
from torch.utils.data import DataLoader

In [30]:
BATCH_SIZE = 16

In [31]:
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    num_workers=0,
    sampler=train_sampler
)

In [32]:
val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    num_workers=0,
    sampler=val_sampler
)

In [33]:
test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    num_workers=0,
    shuffle=False
)

In [34]:
from torchvision import models

In [35]:
model = models.resnet18(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\Ameno/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth
100.0%


In [36]:
for param in model.parameters():
    param.requires_grad = False

In [37]:
import torch.nn as nn

In [38]:
n_inputs = model.fc.in_features
n_outputs = 10

In [39]:
sequential_layers = nn.Sequential(
    nn.Linear(n_inputs, 128),
    nn.ReLU(),
    nn.Dropout(.2),
    nn.Linear(128, n_outputs),
    nn.LogSoftmax(dim=1)
)

In [40]:
# sequential_layers = nn.Linear(n_inputs, n_outputs)

In [41]:
model.fc = sequential_layers

In [42]:
model = model.to('cuda')

AssertionError: Torch not compiled with CUDA enabled

In [43]:
import torch.optim as optim

In [44]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=.9)

scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=.1)

In [45]:
loaders = {
    'train': train_loader,
    'val': val_loader,
    'test': test_loader
}

In [46]:
dataset_sizes = {
    'train': len(train_dataset),
    'val': len(val_dataset),
    'test': len(test_dataset)
}

In [47]:
from copy import deepcopy

In [48]:
%%time

EPOCHS = 15

for epoch in range(1, EPOCHS+1):
    best_acc = .0
    print(f"\nEpoch {epoch}/{EPOCHS}\n{'='*25}")
    for phase in ['train', 'val']:
        running_loss = .0
        running_corrects = .0
        if phase == 'train': model.train()
        if phase == 'val': model.eval()
        for inputs, labels in loaders[phase]:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)
                
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels)
        epoch_loss = running_loss / dataset_sizes[phase]
        epoch_acc = running_corrects.double() / dataset_sizes[phase]
        if phase == 'train': scheduler.step()
        if phase == 'val' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_weights = deepcopy(model.state_dict())
        print(f"Loss ({phase}): {epoch_loss}, Acc ({phase}): {epoch_acc}")


Epoch 1/15
Loss (train): 1.840763053894043, Acc (train): 0.09466666666666666
Loss (val): 0.444404998143514, Acc (val): 0.06266666666666666

Epoch 2/15
Loss (train): 1.7466209869384766, Acc (train): 0.21066666666666667
Loss (val): 0.42186331113179526, Acc (val): 0.08266666666666667

Epoch 3/15
Loss (train): 1.6496122360229493, Acc (train): 0.324
Loss (val): 0.39374852561950685, Acc (val): 0.09733333333333333

Epoch 4/15
Loss (train): 1.5360486653645833, Acc (train): 0.37066666666666664
Loss (val): 0.36659874725341796, Acc (val): 0.116

Epoch 5/15
Loss (train): 1.422447914123535, Acc (train): 0.428
Loss (val): 0.34065721861521403, Acc (val): 0.11866666666666667

Epoch 6/15
Loss (train): 1.3079138272603352, Acc (train): 0.456
Loss (val): 0.3107647196451823, Acc (val): 0.12666666666666668

Epoch 7/15
Loss (train): 1.2117537587483724, Acc (train): 0.47333333333333333
Loss (val): 0.2859145215352376, Acc (val): 0.12

Epoch 8/15
Loss (train): 1.124444714864095, Acc (train): 0.5133333333333333

In [49]:
torch.save(best_model_weights, '../model/foodnet_resnet18.pth')

In [50]:
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [51]:
%%time

for inputs, labels in test_loader:
    inputs, labels = inputs.to(device), labels.to(device)

    with torch.no_grad():
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

    running_loss += loss.item() * inputs.size(0)
    running_corrects += torch.sum(preds == labels)
    
loss = running_loss / dataset_sizes['test']
acc = running_corrects.double() / dataset_sizes['test']

CPU times: total: 10min 26s
Wall time: 1min 49s


In [49]:
print(f"Test Loss: {epoch_loss}, Test Accuracy: {epoch_acc}")

Test Loss: 1.371259614944458, Test Accuracy: 0.648


---

In [52]:
import torch.nn as nn

from torchvision.models.resnet import ResNet, BasicBlock


class ImageClassifier(ResNet):
    def __init__(self):
        super(ImageClassifier, self).__init__(BasicBlock, [2,2,2,2], num_classes=10)

        self.fc = nn.Sequential(
            nn.Linear(512 * BasicBlock.expansion, 128),
            nn.ReLU(),
            nn.Dropout(.2),
            nn.Linear(128, 10),
            nn.LogSoftmax(dim=1)
        )

In [53]:
mod = ImageClassifier()

In [54]:
mod.load_state_dict(torch.load("../model/foodnet_resnet18.pth"))

<All keys matched successfully>

In [55]:
mod.eval()

ImageClassifier(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace