In [1]:
import os
import sys
import glob
import shutil
import json
import cv2
import numpy as np
from PIL import Image
import torch
from torch.utils.data.dataset import Dataset
import torchvision.transforms as transforms

In [64]:
class SVHNDataset(Dataset):
    def __init__(self, img_path, img_label, transform=None):
        self.img_path = img_path
        self.img_label = img_label 
        if transform is not None:
            self.transform = transform
        else:
            self.transform = None

    def __getitem__(self, index):
        img = Image.open(self.img_path[index]).convert('RGB')

        if self.transform is not None:
            img = self.transform(img)
        
        # 原始SVHN中类别10为数字0
        lbl = np.array(self.img_label[index], dtype=np.int32)
        lbl = list(lbl)  + (6 - len(lbl)) * [10]
        
        return img, torch.from_numpy(np.array(lbl[:6])).long()

    def __len__(self):
        return len(self.img_path)

In [65]:
train_path = glob.glob('./data/mchar_train/*.png')
train_path.sort()
train_json = json.load(open('./data/mchar_train.json'))
train_label = [train_json[x]['label'] for x in train_json]

In [66]:
train_loader = torch.utils.data.DataLoader(
        SVHNDataset(train_path, train_label,
                   transforms.Compose([
                       transforms.Resize((64, 128)),
                       transforms.ColorJitter(0.3, 0.3, 0.2),
                       transforms.RandomRotation(5),
                       transforms.ToTensor(),
                       transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ])), 
    batch_size=10, # 每批样本个数
    shuffle=False, # 是否打乱顺序
    #num_workers=10, # 读取的线程个数
)

In [67]:
import torch
torch.manual_seed(1)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True

import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data.dataset import Dataset

In [68]:
class SVNH_MODEL1(nn.Module):
    def __init__(self):
        super(SVNH_MODEL1, self).__init__()
        self.cnn = nn.Sequential(nn.Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2)),
        nn.ReLU(),
        nn.MaxPool2d(2),
        nn.Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2)),
        nn.ReLU(),
        nn.MaxPool2d(2),)
        self.fc1 = nn.Linear(32*3*7, 11)
        self.fc2 = nn.Linear(32*3*7, 11)
        self.fc3 = nn.Linear(32*3*7, 11)
        self.fc4 = nn.Linear(32*3*7, 11)
        self.fc5 = nn.Linear(32*3*7, 11)
        self.fc6 = nn.Linear(32*3*7, 11)
    def forward(self, img):
        feat = self.cnn(img)
        feat = feat.view(feat.shape[0], -1)
        c1 = self.fc1(feat)
        c2 = self.fc2(feat)
        c3 = self.fc3(feat)
        c4 = self.fc4(feat)
        c5 = self.fc5(feat)
        c6 = self.fc6(feat)
        return c1, c2, c3, c4, c5, c6
model = SVNH_MODEL1()

In [87]:
from tqdm import tqdm
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), 0.005)

loss_plot, c0_plot = [], []
# 迭代10个Epoch
for epoch in range(10):
    for data in tqdm(train_loader):
        c0, c1, c2, c3, c4, c5 = model(data[0])
        # print(c5.shape)
        # print(data[1].shape)
        # print(data[1][:,4])
        loss = criterion(c0, data[1][:, 0]) + \
                criterion(c1, data[1][:, 1]) + \
                criterion(c2, data[1][:, 2]) + \
                criterion(c3, data[1][:, 3]) + \
                criterion(c4, data[1][:, 4]) + \
                criterion(c5, data[1][:, 5])
        loss /= 6
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        loss_plot.append(loss.item())
        c0_plot.append((c0.argmax(1) == data[1][:, 0]).sum().item()*1.0 / c0.shape[0])
        

  0%|          | 0/3000 [00:00<?, ?it/s]


RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor

In [70]:
val_path = glob.glob('./data/mchar_val/*.png')
val_path.sort()
val_json = json.load(open('./data/mchar_val.json'))
val_label = [val_json[x]['label'] for x in val_json] 

In [71]:
train_dataset = SVHNDataset(train_path, train_label,
                   transforms.Compose([
                       transforms.Resize((64, 128)),
                       transforms.ColorJitter(0.3, 0.3, 0.2),
                       transforms.RandomRotation(5),
                       transforms.ToTensor(),
                       transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ]))

In [72]:
val_dataset = SVHNDataset(val_path, val_label,
                   transforms.Compose([
                       transforms.Resize((64, 128)),
                       transforms.ColorJitter(0.3, 0.3, 0.2),
                       transforms.RandomRotation(5),
                       transforms.ToTensor(),
                       transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ]))

In [88]:
def train(train_loader, model, criterion, optimizer, epoch):
    # 切换模型为训练模式
    model.train()

    for i, (input, target) in tqdm(enumerate(train_loader)):
        c0, c1, c2, c3, c4, c5 = model(data[0].cuda())
        loss = criterion(c0, data[1][:, 0].cuda()) + \
                criterion(c1, data[1][:, 1].cuda()) + \
                criterion(c2, data[1][:, 2].cuda()) + \
                criterion(c3, data[1][:, 3].cuda()) + \
                criterion(c4, data[1][:, 4].cuda()) + \
                criterion(c5, data[1][:, 5].cuda())
        loss /= 6
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [91]:
def validate(val_loader, model, criterion):
    # 切换模型为预测模型
    model.eval()
    val_loss = []

    # 不记录模型梯度信息
    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):
            c0, c1, c2, c3, c4, c5 = model(data[0].cuda())
            loss = criterion(c0, data[1][:, 0].cuda()) + \
                    criterion(c1, data[1][:, 1].cuda()) + \
                    criterion(c2, data[1][:, 2].cuda()) + \
                    criterion(c3, data[1][:, 3].cuda()) + \
                    criterion(c4, data[1][:, 4].cuda()) + \
                    criterion(c5, data[1][:, 5].cuda())
            loss /= 6
            val_loss.append(loss.item())
    return np.mean(val_loss)

In [92]:
import torch.nn as nn
from torch.utils.data.dataset import Dataset
from tqdm import tqdm

class SVHN_Model1(nn.Module):
    def __init__(self):
        super(SVHN_Model1, self).__init__()
                
        model_conv = models.resnet18(pretrained=True)
        model_conv.avgpool = nn.AdaptiveAvgPool2d(1)
        model_conv = nn.Sequential(*list(model_conv.children())[:-1])
        self.cnn = model_conv
        
        self.fc1 = nn.Linear(512, 11)
        self.fc2 = nn.Linear(512, 11)
        self.fc3 = nn.Linear(512, 11)
        self.fc4 = nn.Linear(512, 11)
        self.fc5 = nn.Linear(512, 11)
        self.fc6 = nn.Linear(512, 11)
    
    def forward(self, img):        
        feat = self.cnn(img)
        # print(feat.shape)
        feat = feat.view(feat.shape[0], -1)
        c1 = self.fc1(feat)
        c2 = self.fc2(feat)
        c3 = self.fc3(feat)
        c4 = self.fc4(feat)
        c5 = self.fc5(feat)
        c6 = self.fc6(feat)
        return c1, c2, c3, c4, c5, c6
    
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=10, 
    shuffle=True, 
    # num_workers=-1, 
)
    
val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=10, 
    shuffle=False, 
    # num_workers=10, 
)

model = SVHN_Model1().cuda()
criterion = nn.CrossEntropyLoss (size_average=False)
optimizer = torch.optim.Adam(model.parameters(), 0.001)
best_loss = 1000.0
for epoch in range(20):
    print('Epoch: ', epoch)

    train(train_loader, model, criterion, optimizer, epoch)
    val_loss = validate(val_loader, model, criterion)
    print(val_loss)
    
    # 记录下验证集精度
    if val_loss < best_loss:
        best_loss = val_loss
        torch.save(model.state_dict(), './model.pt')

Epoch:  0


3000it [04:01, 12.44it/s]


4.579612868838012e-05
Epoch:  1


3000it [04:06, 12.19it/s]


8.404251275351271e-06
Epoch:  2


3000it [04:00, 12.45it/s]


1.8874802663049195e-06
Epoch:  3


3000it [03:59, 12.52it/s]


4.1723248500602494e-07
Epoch:  4


3000it [04:00, 12.46it/s]


5.9604641222676946e-08
Epoch:  5


3000it [04:01, 12.40it/s]


0.0
Epoch:  6


3000it [04:01, 12.42it/s]


0.0
Epoch:  7


3000it [04:04, 12.27it/s]


0.0
Epoch:  8


3000it [04:04, 12.29it/s]


0.0
Epoch:  9


3000it [04:03, 12.34it/s]


0.0
Epoch:  10


3000it [04:05, 12.20it/s]


0.0
Epoch:  11


3000it [04:04, 12.27it/s]


0.0
Epoch:  12


3000it [04:04, 12.29it/s]


0.0
Epoch:  13


2881it [03:52, 12.39it/s]


KeyboardInterrupt: 