In [124]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data.dataset import Dataset
from torchvision import transforms
import os, sys, glob, shutil, json

from PIL import Image
import numpy as np


In [125]:
class CVNewDataset(Dataset):
    def __init__(self,img_path,img_label,transform = None):
        self.img_path = img_path
        self.img_label = img_label
        if transform is not None:
            self.transform = transform
        else:    
            self.transform = None
        
        
    def __getitem__(self,index):
        img = Image.open(self.img_path[index]).convert('RGB')

        if self.transform is not None:
            img = self.transform(img)
        
        # 原始SVHN中类别10为数字0
        lbl = np.array(self.img_label[index], dtype=np.int)
        lbl = list(lbl)  + (5 - len(lbl)) * [10]
        
        return img, torch.from_numpy(np.array(lbl[:5]))

    def __len__(self):
        return len(self.img_path)

train_path = glob.glob('./mchar_train/*.png')
train_path.sort()
train_json = json.load(open('./mchar_train.json'))
train_label = [train_json[x]['label'] for x in train_json]

train_loader = torch.utils.data.DataLoader(
        CVNewDataset(train_path, train_label,
                   transforms.Compose([
                       transforms.Resize((64, 128)),
                       transforms.ColorJitter(0.3, 0.3, 0.2),
                       transforms.RandomRotation(5),
                       transforms.ToTensor(),
                       transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ])), 
    batch_size=10, # 每批样本个数
    shuffle=False, # 是否打乱顺序
    num_workers=10, # 读取的线程个数
)

val_path = glob.glob('./mchar_val/*.png')
val_path.sort()
val_json = json.load(open('./mchar_val.json'))
val_label = [val_json[x]['label'] for x in val_json]

test_loader = torch.utils.data.DataLoader(
        CVNewDataset(val_path, val_label,
                   transforms.Compose([
                       transforms.Resize((64, 128)),
                       transforms.ColorJitter(0.3, 0.3, 0.2),
                       transforms.RandomRotation(5),
                       transforms.ToTensor(),
                       transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ])), 
    batch_size=10, # 每批样本个数
    shuffle=False, # 是否打乱顺序
    num_workers=10, # 读取的线程个数
)


        

In [126]:
test = CVNewDataset(train_path,train_label)
print(test[0])

(<PIL.Image.Image image mode=RGB size=741x350 at 0x7FEBA30F4C10>, tensor([ 1,  9, 10, 10, 10]))


In [127]:
print(test.__len__())

30000


In [198]:
for i, (a,b) in enumerate(train_loader):
    print('i:',i)
#     print('data:',a.shape)
    print('label:',b[0])
    
    break

i: 0
label: tensor([ 1,  9, 10, 10, 10])


In [226]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # CNN提取特征模块
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2)),
            nn.ReLU(),  
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2)),
            nn.ReLU(), 
            nn.MaxPool2d(2),
        )
        # 
        self.fc1 = nn.Linear(32*3*7, 11)
        self.fc2 = nn.Linear(32*3*7, 11)
        self.fc3 = nn.Linear(32*3*7, 11)
        self.fc4 = nn.Linear(32*3*7, 11)
        self.fc5 = nn.Linear(32*3*7, 11)
        self.fc6 = nn.Linear(32*3*7, 11)
    
    def forward(self, img):        
        feat = self.cnn(img)
#         print(feat.shape)
        feat = feat.view(feat.size(0), -1)
#         print("第二次：",feat.shape)
        c1 = self.fc1(feat)
        c2 = self.fc2(feat)
        c3 = self.fc3(feat)
        c4 = self.fc4(feat)
        c5 = self.fc5(feat)
        c6 = self.fc6(feat)
        return c1, c2, c3, c4, c5, c6

In [257]:
# criterion = nn.CrossEntropyLoss()
def train(model, device, train_loader, optimizer, epoch, log_interval=100):
    model.train()
    
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        
        c0,c1,c2,c3,c4,c5 = model(data)
#         print(c0.shape)
#         print('data:',target.shape)
#         break
#         loss = criterion(c0,data[1][:,0])+criterion(c1,data[1][:,1])+criterion(c2,data[1][:,2])+\
#                criterion(c3,data[1][:,3])+criterion(c4,data[1][:,4])+criterion(c5,data[1][:,5])
#         loss /=6
        loss = F.cross_entropy(c0,target)+F.cross_entropy(c1,target)+F.cross_entropy(c2,target)+\
               F.cross_entropy(c3,target)+F.cross_entropy(c4,target)+F.cross_entropy(c5,target)
    
        loss /=6
        
        
        
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print("Train Epoch: {} [{}/{} ({:0f}%)]\tLoss: {:.6f}".format(
                epoch, batch_idx * len(data), len(train_loader.dataset), 
                100. * batch_idx / len(train_loader), loss.item()
            ))

In [258]:
# def test(model, device, test_loader):
#     model.eval()
#     test_loss = 0
#     correct = 0
#     with torch.no_grad():
#         for data, target in test_loader:
#             data, target = data.to(device), target.to(device)
            
#             c0,c1,c2,c3,c4,c5 = model(data[0])
#             loss = optimizer(c0,data[1][:,0])+optimizer(c1,data[1][:,1])+optimizer(c2,data[1][:,2])+\
#                    optimizer(c3,data[1][:,3])+optimizer(c4,data[1][:,4])+optimizer(c5,data[1][:,5])
#             loss /=6
            
#             test_loss += loss.item() # sum up batch loss
            
#             correct += pred.eq(target.view_as(pred)).sum().item()

#     test_loss /= len(test_loader.dataset)

#     print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
#         test_loss, correct, len(test_loader.dataset),
#         100. * correct / len(test_loader.dataset)))

In [259]:
torch.manual_seed(53113)

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

lr = 0.005
momentum = 0.9
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=lr,momentum=momentum)

epochs = 2
for epoch in range(1, epochs + 1):
    train(model, device, train_loader, optimizer,criterion, epoch)
#     test(model, device, test_loader)

save_model = True
if (save_model):
    torch.save(model.state_dict(),"cv_cnn.pt")

RuntimeError: 1D target tensor expected, multi-target not supported