In [1]:
import numpy as np
import pandas as pd
import io
import scipy.io as scio
import matplotlib.pyplot as plt
from PIL import Image

In [2]:
datafile = 'cars_annos.mat'
# data = scio.loadmat(datafile)

In [5]:
data.keys()

dict_keys(['__header__', '__version__', '__globals__', 'annotations', 'class_names'])

In [35]:
im = Image.open(data['annotations'][0, 1][0][0]).convert('RGB')
im.show()


In [30]:

a = []
for i in range(3):
    anno = []
    for j in range(1, 5):
        anno.append(int(data['annotations'][0, i][j]))
    a.append(anno)
a

[[112, 7, 853, 717], [48, 24, 441, 202], [7, 4, 277, 180]]

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
import os
import time

class CarDataset(Dataset):
    def __init__(self, datafile, transform=None, train=False):
        data = scio.loadmat(datafile)
        self.transform = transform
        self.annotations = []
        self.classes = []
        self.img_paths = []
        for i in range(data['annotations'].shape[1]):
            anno = []
            if train and int(data['annotations'][0, i][6]) == 0:
                for j in range(1, 5):
                    anno.append(int(data['annotations'][0, i][j]))
                self.annotations.append(anno)
                self.classes.append(int(data['annotations'][0, i][5]))
                self.img_paths.append(data['annotations'][0, i][0][0])
            elif not train and int(data['annotations'][0, i][6]):
                for j in range(1, 5):
                    anno.append(int(data['annotations'][0, i][j]))
                self.annotations.append(anno)
                self.classes.append(int(data['annotations'][0, i][5]))
                self.img_paths.append(data['annotations'][0, i][0][0])
                 
        
    def __getitem__(self, index):
        img_path = self.img_paths[index]
        label = self.classes[index]
        annotation = self.annotations[index]
        img = Image.open(img_path).convert('RGB')
        if self.transform is not None:
            img = self.transform(img)
            label = int(label)
        return img, label-1
    
    def __len__(self):
        return len(self.classes)

In [56]:
from torchvision import transforms
img_transform = transforms.Compose([
            transforms.Resize((256,256),interpolation=3),
            transforms.RandomCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
train_dataset = CarDataset(datafile, transform=img_transform, train=True)
test_dataset = CarDataset(datafile, transform=img_transform)

In [59]:
from torchvision import models
EPOCH = 50
batch_size = 8
num_class = 196
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True )
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)
# 替换 fc 符合自己的分类
model = models.resnet50(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, num_class) 

#损失函数:这里用交叉熵
criterion = nn.CrossEntropyLoss()
#优化器 这里用SGD
optimizer = optim.Adam(model.parameters(), lr=1e-3)
#device : GPU or CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

# 训练
for epoch in range(EPOCH):
    start_time = time.time()
    running_loss = 0.0
    running_corrects = 0.0
    for i, data in enumerate(train_loader):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        # 前向传播
        outputs = model(inputs)
        _, preds = torch.max(outputs.data, 1)
        # 计算损失函数
        loss = criterion(outputs, labels)
        # 清空上一轮梯度
        optimizer.zero_grad()
        # 反向传播
        loss.backward()
        # 参数更新
        optimizer.step()
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data).to(torch.float32)
    batch_loss = running_loss / ((i+1)*batch_size)
    batch_acc = running_corrects / ((i+1)*batch_size)
    print('{} Epoch [{}]  Loss: {:.4f} Acc: {:.4f} Time: {:.4f}s'. \
                          format('train', epoch, batch_loss, batch_acc, time.time()-start_time))


#保存训练模型
file_name = 'car_resnet.pkl'
torch.save(model, file_name)
print(file_name+' saved')

train Epoch [0]  Loss: 0.6676 Acc: 0.0052 Time: 520.2124s
train Epoch [1]  Loss: 0.6553 Acc: 0.0092 Time: 517.7003s
train Epoch [2]  Loss: 0.6506 Acc: 0.0102 Time: 518.8676s
train Epoch [3]  Loss: 0.6498 Acc: 0.0114 Time: 518.5370s
train Epoch [4]  Loss: 0.6478 Acc: 0.0120 Time: 521.7381s
train Epoch [5]  Loss: 0.6447 Acc: 0.0103 Time: 519.4752s
train Epoch [6]  Loss: 0.6436 Acc: 0.0118 Time: 519.1273s
train Epoch [7]  Loss: 0.6433 Acc: 0.0135 Time: 522.7507s
train Epoch [8]  Loss: 0.6398 Acc: 0.0141 Time: 517.7616s
train Epoch [9]  Loss: 0.6376 Acc: 0.0146 Time: 503.0382s
train Epoch [10]  Loss: 0.6344 Acc: 0.0177 Time: 433.6416s
train Epoch [11]  Loss: 0.6319 Acc: 0.0196 Time: 433.4290s
train Epoch [12]  Loss: 0.6257 Acc: 0.0260 Time: 433.1971s
train Epoch [13]  Loss: 0.6177 Acc: 0.0269 Time: 433.7028s
train Epoch [14]  Loss: 0.6095 Acc: 0.0357 Time: 432.7685s
train Epoch [15]  Loss: 0.5997 Acc: 0.0415 Time: 431.9605s
train Epoch [16]  Loss: 0.5871 Acc: 0.0519 Time: 433.0289s
train E

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


car_resnet.pkl saved


In [12]:
from torchvision import transforms
from torchvision import models
img_transform = transforms.Compose([
            transforms.Resize((256,256),interpolation=3),
            transforms.RandomCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
test_dataset = CarDataset(datafile, transform=img_transform)
test_loader = DataLoader(dataset=test_dataset, batch_size=4, shuffle=False)

file_name = 'car_resnet.pkl'
# 测试
model = torch.load(file_name)
model.eval()
model.cuda()
correct, total = 0, 0

for data in test_loader:
    images, labels = data
    images, labels = images.cuda(), labels.cuda()
    # 前向传播
    out = model(images)
    _, predicted = torch.max(out.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

#输出识别准确率
print('10000测试图像 准确率:{:.4f}%'.format(100.0 * correct / total)) 

10000测试图像 准确率:54.5206%


In [8]:
predicted

tensor([178], device='cuda:0')

In [9]:
labels

tensor([196], device='cuda:0')

In [10]:
out

tensor([[-82.7535, -73.4430, -62.4700, -57.8358, -64.8521, -59.4653, -65.2462,
         -56.6374, -61.5434, -58.9353, -57.5317, -65.7276, -67.1602, -68.9190,
         -60.6845, -63.8614, -76.4340, -73.7415, -64.8479, -61.3080, -69.1059,
         -66.1126, -65.4822, -65.4555, -63.8386, -64.8420, -66.2069, -60.6348,
         -63.1836, -61.8758, -65.8972, -62.3835, -70.1772, -64.2736, -60.4333,
         -58.7898, -60.3093, -66.7532, -63.4566, -61.0303, -66.6297, -62.4869,
         -66.9926, -60.7368, -65.1228, -59.4861, -62.0199, -63.9951, -73.0693,
         -62.7425, -71.1742, -59.2207, -62.3429, -74.8161, -74.3066, -65.6202,
         -67.2411, -58.1937, -67.4876, -63.3684, -64.6068, -64.8132, -76.5681,
         -57.9894, -68.7152, -75.5912, -69.9301, -65.9906, -66.0114, -80.0260,
         -78.3138, -68.1342, -67.5819, -68.4198, -79.0545, -75.5072, -69.9324,
         -69.9396, -74.8502, -69.7273, -69.2101, -66.4308, -68.9045, -70.0837,
         -72.5530, -72.1565, -73.2755, -70.4809, -65

In [11]:
out.shape

torch.Size([1, 197])

下一步提高精度的方向：

+ 增加epoch
+ 根据提供的anno 进一步分割出 car的位置
+ 类别是196，labels应该整体减1
+ 图像增强

需要确定是过拟合还是欠拟合，需要划分traindataset和valdataset 看loss

