In [42]:
import torch
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms

# [Similar and small, retrain FC layer](https://towardsdatascience.com/a-practical-example-in-transfer-learning-with-pytorch-846bb835f2db)
### ~~Camera Data Collection~~
### Prepare the dataset and dataloader

In [43]:
dataset = datasets.ImageFolder(
    'thumb-dataset',
    transforms.Compose([
        transforms.ColorJitter(0.1, 0.1, 0.1, 0.1),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
)
train_dataset, test_dataset = torch.utils.data.random_split(
    dataset, [len(dataset) - 50, 50]
)

torchvision.datasets.ImageFolder -> list -> DataLoader -> Tensor

In [44]:
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=16,
    shuffle=True,
    num_workers=4
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=16,
    shuffle=True,
    num_workers=4
)

In [41]:
for batch in train_loader:
    print(batch)

[tensor([[[[ 0.9817,  0.9988,  0.9988,  ...,  0.9988,  0.9988,  1.0159],
          [ 1.0331,  1.0159,  0.9988,  ...,  1.0159,  0.9988,  0.9988],
          [ 1.0502,  1.0502,  1.0159,  ...,  1.0331,  0.9988,  0.9988],
          ...,
          [ 0.8104,  0.7762,  0.6906,  ...,  0.6563,  0.6049,  0.5536],
          [ 0.7248,  0.7248,  0.6734,  ...,  0.6221,  0.5707,  0.5364],
          [ 0.6392,  0.6392,  0.6221,  ...,  0.5536,  0.5193,  0.5022]],

         [[ 1.2556,  1.2556,  1.2556,  ...,  1.2556,  1.2556,  1.2731],
          [ 1.2906,  1.2731,  1.2556,  ...,  1.2731,  1.2556,  1.2556],
          [ 1.3081,  1.3081,  1.2731,  ...,  1.2906,  1.2556,  1.2556],
          ...,
          [ 1.0805,  1.0455,  0.9930,  ...,  0.9055,  0.8704,  0.8354],
          [ 0.9930,  1.0105,  0.9755,  ...,  0.8704,  0.8354,  0.8179],
          [ 0.8880,  0.9230,  0.9055,  ...,  0.8179,  0.8004,  0.7654]],

         [[ 1.4025,  1.4025,  1.4025,  ...,  1.4548,  1.4548,  1.4722],
          [ 1.4374,  1.4200, 

### Prepare the mdoel

In [37]:
# torchvision package provides a collection of pre-trained models
import torchvision.models as models
model = models.alexnet(pretrained=True)
'''
AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace=True)
    (3): Dropout(p=0.5, inplace=False)
    (4): Linear(in_features=4096, out_features=4096, bias=True)
    (5): ReLU(inplace=True)
    (6): Linear(in_features=4096, out_features=1000, bias=True)
  )
)
'''
# replace the final layer with a new, untrained layer with only two outputs.
model.classifier[6] = torch.nn.Linear(
    model.classifier[6].in_features, 2
)

  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


In [38]:
NUM_EPOCHS = 30
BEST_MODEL_PATH = 'thumb-model/best_model.pth'
best_accuracy = 0.0

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

#% labels defined in ImageFolder
for epoch in range(NUM_EPOCHS):
    
    for images, labels in iter(train_loader):
        optimizer.zero_grad()
        outputs = model(images)
        
        print(images)
        print(images.shape)
        print(labels)
        print(labels.shape)
        
        loss = F.cross_entropy(outputs, labels)
        loss.backward()
        optimizer.step()
    
    test_error_count = 0.0
    for images, labels in iter(test_loader):
        outputs = model(images)
        
        test_error_count += float(torch.sum(torch.abs(labels - outputs.argmax(1))))
    
    test_accuracy = 1.0 - float(test_error_count) / float(len(test_dataset))
    print('%d: %f' % (epoch, test_accuracy))
    if test_accuracy > best_accuracy:
        torch.save(model.state_dict(), BEST_MODEL_PATH)
        best_accuracy = test_accuracy

tensor([[[[-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          ...,
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1008, -2.1008, -2.1008,  ..., -2.1179, -2.1179, -2.1179]],

         [[-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          ...,
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357]],

         [[-1.8044, -1.8044, -1.8044,  ..., -1.8044, -1.8044, -1.8044],
          [-1.8044, -1.8044, -

In [40]:
import cv2
import numpy as np
from numpy import asarray

mean = 255.0 * np.array([0.485, 0.456, 0.406])
stdev = 255.0 * np.array([0.229, 0.224, 0.225])

normalize = torchvision.transforms.Normalize(mean, stdev)

def preprocess(camera_value):
    global normalize
    x = camera_value
    x = asarray(x)
    x = x.transpose((2, 0, 1))
    x = torch.from_numpy(x).float()
    x = normalize(x)
    return x

model = torchvision.models.alexnet(pretrained=False)
model.classifier[6] = torch.nn.Linear(model.classifier[6].in_features, 2)
model.load_state_dict(torch.load('thumb-model/best_model.pth'))

pic = "thumb-dataset/thumbsup_dir/susana_tu05_png.rf.4082812213e45c9cb15f72bcc440accf.jpg"
image_thumbs = cv2.imread(pic, 1)

from PIL import Image
image_thumbs = Image.fromarray(image_thumbs)
image_thumbs = torchvision.transforms.Resize((224, 224))(image_thumbs)

x = preprocess(image_thumbs)
x = x.unsqueeze(0) # https://zhuanlan.zhihu.com/p/445836965
print(x.shape)
y = model(x)
y = F.softmax(y, dim=1)
y
# Prediction Done

torch.Size([1, 3, 224, 224])


tensor([[3.1419e-06, 1.0000e+00]], grad_fn=<SoftmaxBackward0>)

![](./md-img/image-20221009185928164.png)