In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# https://www.kaggle.com/code/sombersomni/finetuned-resnet-classifier-in-pytorch-97

In [4]:
root_path = 'kaggle/10BigCats'
wildcats_data = pd.read_csv(f'{root_path}/WILDCATS.CSV')
# 只看前五個
wildcats_data.head()

Unnamed: 0,class id,filepaths,labels,data set,scientific name
0,0,train/AFRICAN LEOPARD/001.jpg,AFRICAN LEOPARD,train,Panthera pardus pardus
1,0,train/AFRICAN LEOPARD/002.jpg,AFRICAN LEOPARD,train,Panthera pardus pardus
2,0,train/AFRICAN LEOPARD/003.jpg,AFRICAN LEOPARD,train,Panthera pardus pardus
3,0,train/AFRICAN LEOPARD/004.jpg,AFRICAN LEOPARD,train,Panthera pardus pardus
4,0,train/AFRICAN LEOPARD/005.jpg,AFRICAN LEOPARD,train,Panthera pardus pardus


In [5]:
import torch
torch.manual_seed(888)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
from torch.utils.data import Dataset
import torchvision.transforms as transforms
from PIL import Image
import os

class WildCatDataset(Dataset):
    def __init__(self, data, transform=None, data_augmentation=False):
        # index 欄位丟掉
        self.data = data.reset_index(drop=True)
        self.transform = transform
        self.initial_transform = transforms.Compose([
            transforms.PILToTensor(),
        ])
        # mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.224], 是由imagenet 訓練集中抽樣計算出來的
        input_transforms = [
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
            )]+([transforms.RandomHorizontalFlip()] if data_augmentation else [])

        self.input_transform = transforms.Compose(input_transforms)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # 分類編號
        class_id = self.data.loc[idx, 'class id']
        # 檔案位置
        file_path = self.data.loc[idx, 'filepaths']
        name = self.data.loc[idx, 'labels']
        # 打開圖像轉成 tensor
        full_path = os.path.join(root_path, file_path)
        with Image.open(full_path) as img:
            img = self.initial_transform(img)
            # 旋轉圖像，因為它們最初都是逆時針翻轉 90 度
            img = transforms.functional.rotate(img, 90)

        if self.transform:
            img = self.transform(img)
        # 正規化
        img_input = self.input_transform(img.float())
        img = img.transpose(2,0).detach().numpy()
        sample ={'targets':class_id, 'inputs': img_input, 'images':img, 'names': name}
        return sample



In [7]:
# Let's separate each data set
train_data = wildcats_data[wildcats_data['data set'] == 'train']
test_data = wildcats_data[wildcats_data['data set'] == 'test']
valid_data = wildcats_data[wildcats_data['data set'] == 'valid']
len(train_data), len(test_data), len(valid_data)

(2339, 50, 50)

In [8]:
from torch.utils.data import DataLoader

batch_size = 64
test_dataloader = DataLoader(WildCatDataset(test_data), batch_size=batch_size, shuffle=False)
valid_dataloader = DataLoader(WildCatDataset(valid_data), batch_size=batch_size, shuffle=False)

In [11]:
from torchvision import models
class Model(torch.nn.Module):
    def __init__(self, num_classes=10):
        super(Model, self).__init__()
        
        # Load the ResNet model
        resnet = models.resnet101(weights=models.ResNet101_Weights.DEFAULT)
        # I removed this because I get better results fine tuning the model
#         for param in resnet.parameters():
#             param.requires_grad = False
        
        # Reconstruct the model without the last layer
        self.net = torch.nn.Sequential(*list(resnet.children())[:-1])
        # Build fully connected layers to handle predictions
        self.fcs = torch.nn.Sequential(
            torch.nn.Linear(2048, 2048),
            torch.nn.BatchNorm1d(2048),
            torch.nn.ReLU(),
            torch.nn.Linear(2048, num_classes)
        )
        
    def forward(self, x):
        # Pass the input through the ResNext model
        x = self.net(x)
        # Flatten the output of ResNet
        x = x.view(x.shape[0], -1)
        # Send to fully connected layer
        return self.fcs(x)

In [12]:
def evaluate(model, dataloader):
    with torch.inference_mode():
        total, correct = 0, 0
        for data in dataloader:
            # Get the inputs and move them to the device
            inputs, targets = data['inputs'].float(), data['targets'].long()
            inputs, targets = inputs.to(device), targets.to(device)

            # Forward pass
            outputs = model(inputs)
            _, predicted = torch.max(outputs, axis=1)

            # Record the accuracy
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    # Print the accuracy
    print('Accuracy of the model on the %d images: %.2f %%' % (total, 100 * correct / total))

In [13]:
# .unique)_ 收尋不重複的標籤
num_classes = len(wildcats_data['class id'].unique())
model = Model(num_classes).to(device)
model.load_state_dict(torch.load('checkpoints/10BigCats_v01.pth'))

# Evaluate test data
print('Test Case')
evaluate(model, test_dataloader)

print('Valid Case')
# Evaluate valid data
evaluate(model, valid_dataloader)

Test Case
Accuracy of the model on the 50 images: 98.00 %
Valid Case
Accuracy of the model on the 50 images: 98.00 %
