# Caltech101 分类

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
import os
import torch
import torchvision
import torchvision.transforms as transforms
from sklearn.preprocessing import LabelEncoder
from six.moves import urllib
import matplotlib.pyplot as plt
import numpy as np
import math

这个数据集下载连接为google drive，无法直接从torchvision.datasets下载，可手动下载解压到指定文件夹

In [None]:
transform = transforms.Compose([
    #transforms.ToPILImage(),
    transforms.Lambda(lambda x: x.convert("RGB")),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    #transforms.Normalize(mean = [0.485,0.456,0.406], std=[0.229,0.224,0.225]),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# 下载并加载Caltech101数据集
caltech101_dataset = torchvision.datasets.Caltech101(
    root='./data',
    download=False,  # 设置为True以从互联网下载数据集
    transform=transform
)

print(f'总数据集大小：{len(caltech101_dataset)}')

# 定义训练和测试集的大小
train_size = int(0.8 * len(caltech101_dataset))  # 例如，80%为训练集
test_size = len(caltech101_dataset) - train_size  # 剩余20%为测试集
print(f"train_size: {train_size}, test_size: {test_size}");
train_data,test_data = torch.utils.data.random_split(caltech101_dataset, [train_size, test_size])

batch_size = 8

# 创建数据加载器
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=2)


for images, labels in train_loader:
    #print(images.size(), labels)
    break;
    

print(images[0].numpy().shape)


In [None]:
caltech101_dataset.categories

In [None]:
def images_show(imgs,labs=[]):
    i=0
    h=math.ceil(len(imgs)/2)
  
    
    for img in imgs:
        img = img / 2 + 0.5  
        npimg = img.numpy()
        plt.subplot(2,h,i+1)
        if labs != []:
            plt.title(caltech101_dataset.categories[labs[i]])
        plt.imshow((np.transpose(npimg, (1, 2, 0))))
        #print(npimg.shape)
        i+=1
        #plt.imshow(npimg)
        
plt.show()

In [None]:
images_show(images,labels)

In [None]:
import torch.nn as nn
import torch.nn.functional as F
import pretrainedmodels

# the resnet34 model
class ResNet34(nn.Module):
    def __init__(self, pretrained):
        super(ResNet34, self).__init__()
        if pretrained is True:
            self.model = pretrainedmodels.__dict__['resnet34'](pretrained='imagenet')
        else:
            self.model = pretrainedmodels.__dict__['resnet34'](pretrained = None)
        # change the classification layer
        self.l0= nn.Linear(512, len(caltech101_dataset.categories))
        self.dropout = nn.Dropout2d(0.4)
        
    def forward(self, x):
        # get the batch size only, ignore(c, h, w)
        batch, _, _, _ = x.shape
        x = self.model.features(x)
        x = F.adaptive_avg_pool2d(x, 1).reshape(batch, -1)
        x = self.dropout(x)
        l0 = self.l0(x)
        return l0
        
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # GPU
#device = "cpu"
model = ResNet34(pretrained=True).to(device)
print(model)

In [None]:
from torchsummary import summary
print(summary(model, input_size=(3, 224, 224)))

In [None]:
import torch.optim as optim
# loss function
criterion = nn.CrossEntropyLoss()

# optimizer
optimizer = optim.Adam(model.parameters(), lr = 1e-4)

In [None]:
# training function

epochs = 20 # Number of epochs


for epoch in range(epochs):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 100 == 99:    # print every 100 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 100:.3f}')
            running_loss = 0.0

print('Finished Training')

# 保存权重

In [None]:

#PATH = './caltech101_net.pth'
#torch.save(model.state_dict(), PATH)

In [None]:
dataiter = iter(test_loader)
images, labels = next(dataiter)

# print images
images_show(images,labels)
#print('GroundTruth: ', ' '.join(f'{classes[labels[j]]:5s}' for j in range(4)))

# Test

In [None]:
device = "cpu"
PATH = './caltech101_net.pth'
net = ResNet34(False)
if device == "cpu":
    net.load_state_dict(torch.load(PATH, map_location='cpu'))
else:
    net.load_state_dict(torch.load(PATH))

net.eval()

In [None]:
net.eval()
outputs = net(images)
print(images.shape)
#print(outputs)
#for i in range(outputs):
_, predicted = torch.max(outputs, 1)

print(predicted)
for j in range(8):
    plt.subplot(1,2,1)
    for i in range(101):
        plt.bar(i,outputs[j][i].item())
    plt.title(caltech101_dataset.categories[predicted[j]])
    plt.subplot(1,2,2)

    img = images[j] / 2 + 0.5  
    npimg = img.numpy()
    plt.imshow((np.transpose(npimg, (1, 2, 0))))
    plt.title(caltech101_dataset.categories[labels[j]])
    plt.savefig(f'./fig/{j}.svg')
    plt.show()
    

In [None]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = net(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')

In [None]:
# prepare to count predictions for each class
correct_pred = {classname: 0 for classname in caltech101_dataset.categories}
total_pred = {classname: 0 for classname in caltech101_dataset.categories}

# again no gradients needed
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = net(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[caltech101_dataset.categories[label]] += 1
            total_pred[caltech101_dataset.categories[label]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')

参考：

https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html

https://colab.research.google.com/github/ashishpatel26/Awesome-Pytorch-Tutorials/blob/main/17.Pytorch%20Transfer%20learning%20with%20Caltech101.ipynb
