In [1]:
from torchvision import models
import torch.nn as nn 
import torch
import torchvision.transforms as trn
from pathlib import Path
import torchvision
from torchsummary import summary

num_classes = 4

device = "cuda" 

model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
model.maxpool = nn.Identity()
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

summary(model,input_size = (3, 64, 64))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,728
       BatchNorm2d-2           [-1, 64, 32, 32]             128
              ReLU-3           [-1, 64, 32, 32]               0
          Identity-4           [-1, 64, 32, 32]               0
            Conv2d-5           [-1, 64, 32, 32]           4,096
       BatchNorm2d-6           [-1, 64, 32, 32]             128
              ReLU-7           [-1, 64, 32, 32]               0
            Conv2d-8           [-1, 64, 32, 32]          36,864
       BatchNorm2d-9           [-1, 64, 32, 32]             128
             ReLU-10           [-1, 64, 32, 32]               0
           Conv2d-11          [-1, 256, 32, 32]          16,384
      BatchNorm2d-12          [-1, 256, 32, 32]             512
           Conv2d-13          [-1, 256, 32, 32]          16,384
      BatchNorm2d-14          [-1, 256,

In [2]:
checkpoint_path = "../models/resnet50_2.pth"
checkpoint = torch.load(checkpoint_path, map_location=device)

model.load_state_dict(checkpoint)

<All keys matched successfully>

In [3]:
IMAGES_PATH = "../data/raw/train"

# from competition page
transform = trn.Compose([
        trn.Resize((64, 64)),
        #trn.ToTensor(),
        trn.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
from PIL import Image

images = []
labels = []

# sort like in vscode
all_paths = list(Path(IMAGES_PATH).glob("*"))
all_paths = sorted(list(Path(IMAGES_PATH).glob("*")),key=lambda x: int(x.stem.split("_")[0]))



for image in all_paths[:150]:
    raw_image = torchvision.io.decode_image(image).float()
    images.append(raw_image)
    labels.append(int(image.stem.split("_")[1]))

batch = torch.stack(images)
batch = transform(batch) # ??? transform or not? results are almost the same
output = model(batch.to(device))

predicted_classes = torch.argmax(output,dim=1)

print(labels,predicted_classes)

accuracy = (torch.tensor(labels, device=device) == predicted_classes).float().mean().item()
print(f"Accuracy: {accuracy * 100:.2f}%")

[0, 2, 2, 1, 1, 2, 0, 1, 1, 1, 1, 3, 2, 3, 0, 1, 1, 0, 0, 1, 0, 1, 3, 1, 2, 1, 2, 2, 1, 0, 2, 1, 0, 0, 0, 1, 3, 0, 1, 0, 1, 1, 0, 2, 1, 1, 3, 0, 2, 2, 3, 1, 3, 2, 3, 1, 0, 3, 0, 2, 2, 3, 2, 0, 1, 1, 1, 2, 2, 2, 0, 0, 2, 2, 2, 3, 1, 0, 3, 3, 3, 1, 0, 1, 3, 0, 0, 0, 1, 0, 2, 2, 2, 1, 0, 0, 0, 0, 3, 1, 3, 0, 2, 0, 1, 2, 1, 1, 3, 3, 1, 2, 3, 2, 3, 1, 2, 2, 1, 1, 3, 1, 2, 2, 0, 2, 1, 0, 1, 3, 1, 2, 2, 0, 3, 0, 2, 1, 2, 2, 1, 2, 3, 0, 2, 1, 3, 1, 0, 3] tensor([1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,
        3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 0, 2, 3, 1, 1, 1, 1, 1, 2, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1,
        1, 1, 2, 1, 1, 1, 1, 0, 1, 1, 3, 3, 1, 1, 2, 3, 1, 1, 1, 1, 3, 1, 1, 1,
        3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 0, 1, 1, 1, 2, 2, 1, 3, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 3, 1, 0, 1, 1, 1, 2, 1, 1, 1,
        1, 1, 1, 1, 1, 1], device='cuda:0')
Accuracy: 30.67%
