In [1]:
from torch.utils.data import Dataset
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.cross_validation import train_test_split
from pretrainedmodels.models import bninception
from imgaug import augmenters as iaa

import matplotlib.pyplot as plt
import numpy as np
import os
import cv2
import time

import matplotlib.pyplot as plt
from PIL import Image
import pickle

import matplotlib.pyplot as plt # plt 用于显示图片
import matplotlib.image as mpimg # mpimg 用于读取图片
import numpy as np
import copy

from preprocess import *

img_w, img_h = 64, 64
random_seed = 4050
config_batch_size = 4
class_n = (9 + 10 + 26)
output_n = 9
num_epochs = 100
feature_extract = True
use_pretrained=True
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

char_to_index = {"深":0, "秦":1, "京":2, "海":3, "成":4, "南":5, "杭":6, "苏":7, "松":8}
print(char_to_index)



{'深': 0, '秦': 1, '京': 2, '海': 3, '成': 4, '南': 5, '杭': 6, '苏': 7, '松': 8}


In [2]:
class CarIdDataset(Dataset):
    def __init__(self, data_list, mode, weight = 229, height = 229):
        self.data_list = data_list
        self.mode =mode
        self.weight = weight
        self.height = height

    def __len__(self):
        return len(self.data_list)

    def __getitem__(self,index):
        img_path, label = self.data_list[index]["image_path"], self.data_list[index]["label"]
        img = np.array(Image.open(img_path))
        
        h, w, _ = img.shape
        M = cv2.getAffineTransform(self.data_list[index]["pts"][0], self.data_list[index]["pts"][1])
        img_dst = cv2.warpAffine(img, M, (w, h))
        
        #print("================================")
        char_img_list = []
        for [x, y] in self.data_list[index]["char_segmentation"]:
            char_img = cv2.resize(img_dst[:, x:y, :], (img_w, img_h), interpolation=cv2.INTER_CUBIC)
            augment_img = iaa.SomeOf(2, [
                iaa.Affine(rotate=(-30, 30), shear=(-16, 16), translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}),
                iaa.GaussianBlur(sigma=(0.0, 3.0)),
                iaa.AdditiveGaussianNoise(scale=0.5*255),
                iaa.Add((-40, 40), per_channel=0.5),
                iaa.Sharpen(alpha=0.5),
                iaa.CropAndPad(percent=(-0.25, 0.25)),
            ])
            if self.mode == "train_val":
                char_img = augment_img.augment_image(char_img)
            char_img = transforms.Compose([transforms.ToPILImage(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])(char_img)
            #print("type(char_img) = ", type(char_img))
            #print("char_img.shape = ", char_img.shape)
            char_img_list.append(char_img)
        img = torch.stack(char_img_list,0)
        #img = torch.cat(inputs=char_img_list, dimension=0)
        #print("img.shape = ", img.shape)
        
        #img = cv2.resize(img,(self.weight, self.height))
        #img = transforms.Compose([])(img)
        #print(img.shape)
        y = np.zeros((output_n, class_n))
        for i in range(len(label)):
            y[i, label[i]] = 1
        
        return img, y

In [3]:
label_file = "./data/train-data-label.txt"
image_file = "./data/train-data"
model_file = "./model"
data_list = []
with open(label_file, 'r') as file_to_read:
    while True:
        lines = file_to_read.readline().strip() # 整行读取数据
        if not lines:
            break
        lines = lines.split(",  ")
        image_path = os.path.join(image_file, lines[1])
        label = [];
        label.append(char_to_index[lines[0][0]])
        for i in range(1, len(lines[0])):
            if '0' <= lines[0][i] and lines[0][i] <= '9':
                label.append(9 + ord(lines[0][i]) - ord('0'))
            else:
                label.append(9 + 10 + ord(lines[0][i]) - ord('A'))
        data_list.append({"image_path": image_path, "label":label})

In [4]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                inputs = torch.cat([inputs[i] for i in range(inputs.shape[0])], 0)
                labels = torch.cat([labels[i] for i in range(labels.shape[0])], 0)
                #print("type(inputs) = ", type(inputs))
                #print("type(labels) = ", type(labels))
                #print("inputs.shape = ", inputs.shape)
                #print("labels.shape = ", labels.shape)
                
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    outputs = model(inputs).double()
                    outputs = outputs.squeeze()
                    loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)
                    _, y = torch.max(labels, 1)
                    #print("preds = ", preds)
                    #print("labels = ", labels)
                    running_corrects += torch.sum(preds == y)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                #running_corrects += torch.sum(preds == labels.data)

            #print(len(dataloaders[phase].dataset))
            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                model_filename = model_file + os.sep + str(epoch) + "checkpoint.pth.tar"
                #torch.save({"state_dict":best_model_wts}, model_filename)
            if phase == 'val':
                val_acc_history.append(epoch_acc)
            model_filename = model_file + os.sep + "last_checkpoint.pth.tar"
            torch.save({"state_dict":model.state_dict()}, model_filename)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    best_model_filename = model_file + os.sep + "best_checkpoint.pth.tar"
    torch.save({"state_dict":best_model_wts}, best_model_filename)
    final_model_filename = model_file + os.sep + "final_checkpoint.pth.tar"
    torch.save({"state_dict":model.state_dict()}, final_model_filename)
    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

In [5]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [7]:
#data_list = data_list[:100]
"""
for count in range(len(data_list)):
    data = data_list[count]
    print("count = ", count, "data = ", data)
    pts1, pts2, char_segmentation = preprocess(data["image_path"])
    data["pts"] = [pts1, pts2]
    data["char_segmentation"] = char_segmentation
    
    data_pkl_path = data["image_path"].replace("jpg", "pkl").replace("train-data", "preprocess")
    output = open(data_pkl_path, 'wb')
    pickle.dump(data, output)
    output.close()
"""

'\nfor count in range(len(data_list)):\n    data = data_list[count]\n    print("count = ", count, "data = ", data)\n    pts1, pts2, char_segmentation = preprocess(data["image_path"])\n    data["pts"] = [pts1, pts2]\n    data["char_segmentation"] = char_segmentation\n    \n    data_pkl_path = data["image_path"].replace("jpg", "pkl").replace("train-data", "preprocess")\n    output = open(data_pkl_path, \'wb\')\n    pickle.dump(data, output)\n    output.close()\n'

In [13]:
"""
import pickle

preprocess_data_list = []
for data in data_list:
    data_pkl_path = data["image_path"].replace("jpg", "pkl").replace("train-data", "preprocess")
    pkl_file = open(data_pkl_path, 'rb')
    pkl_data = pickle.load(pkl_file)
    preprocess_data_list.append(pkl_data)

output = open('./data.pkl', 'wb')
pickle.dump(preprocess_data_list, output)
output.close()
"""

In [None]:
"""
for data in data_list:
    flag = 0
    for [x, y] in data["char_segmentation"]:
        if x == y:
            flag = 1
    if flag == 1:
        print(data["char_segmentation"])
        pts1, pts2, char_segmentation = preprocess(data["image_path"])
        data["pts"] = [pts1, pts2]
        data["char_segmentation"] = char_segmentation
        print(data["char_segmentation"])
output = open('./data.pkl', 'wb')
pickle.dump(data_list, output)
output.close()
"""

In [6]:
import pickle

pkl_file = open('./data.pkl', 'rb')

data_list = pickle.load(pkl_file)
print(len(data_list))

4000


In [7]:
config_batch_size = 32
train_data_list, val_data_list, _, _ = train_test_split(data_list, data_list, test_size=0.2, random_state=random_seed)
train_gen = CarIdDataset(train_data_list, "train")
train_loader = DataLoader(train_gen,batch_size=config_batch_size,shuffle=True,pin_memory=True,num_workers=2)

val_gen = CarIdDataset(val_data_list, "val")
val_loader = DataLoader(val_gen,batch_size=config_batch_size,shuffle=False,pin_memory=True,num_workers=2)
dataloaders_dict = {"train":train_loader, "val":val_loader}

In [8]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.features = torch.nn.Sequential(
            torch.nn.Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2)),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False),
            torch.nn.Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False),
            torch.nn.Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            torch.nn.ReLU(),
            torch.nn.Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            torch.nn.ReLU(),
            torch.nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            torch.nn.ReLU(),
            #torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False),
          )
        self.classify = nn.Sequential(
                nn.BatchNorm1d(256),
                nn.Dropout(0.5),
                nn.Linear(256, 45),
            )

    def forward(self, x):
        x = self.features(x)
        adaptiveAvgPoolWidth = x.shape[2]
        x = F.avg_pool2d(x, kernel_size=adaptiveAvgPoolWidth)
        x = x.view(x.size(0), -1)
        x = self.classify(x)
        return x

In [12]:
def get_net():
#    alexnet_model = models.alexnet(pretrained=True)
#    alexnet_dict = alexnet_model.state_dict().copy()

    model = Net()
    model_dict = model.state_dict().copy()
    return model
"""
    alexnet_list = list (alexnet_model.state_dict().keys() )
    model_list = list(model.state_dict().keys() )
    for i in range(len(model_list)):
        if model_list[i][:8] == "features":
            model_dict[model_list[i]] = alexnet_dict[alexnet_list[i]]
    model.load_state_dict(model_dict)
"""

'\n    alexnet_list = list (alexnet_model.state_dict().keys() )\n    model_list = list(model.state_dict().keys() )\n    for i in range(len(model_list)):\n        if model_list[i][:8] == "features":\n            model_dict[model_list[i]] = alexnet_dict[alexnet_list[i]]\n    model.load_state_dict(model_dict)\n'

In [13]:
"""
model_ft = models.alexnet(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier[6].in_features
model_ft.classifier[6] = nn.Linear(num_ftrs,class_n)
model_ft.to(device)
print(model_ft)
model_ft = torch.nn.Sequential(
    torch.nn.Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(2, 2)),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False),
    torch.nn.Conv2d(64, 192, kernel_size=(5, 5), stride=(2, 2), padding=(1, 1)),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False),
    torch.nn.Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
    torch.nn.ReLU(),
    torch.nn.Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
    torch.nn.ReLU(),
    torch.nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False),
    torch.nn.Dropout(p=0.5),
    torch.nn.Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0)),
    torch.nn.ReLU(),
    torch.nn.Conv2d(128, class_n, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0)),
)
"""
model_ft = get_net()
#last_model_filename = model_file + os.sep + "last_checkpoint.pth.tar"
#last_model = torch.load(last_model_filename)
#model_ft.load_state_dict(last_model["state_dict"])
model_ft.to(device)

params_to_update = model_ft.parameters()
optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)
criterion = nn.BCEWithLogitsLoss().to(device)

model_ft, hist = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, num_epochs=500)

Epoch 0/499
----------
train Loss: 6.2208 Acc: 0.2753
val Loss: 6.1504 Acc: 0.4213

Epoch 1/499
----------
train Loss: 6.1351 Acc: 0.3969
val Loss: 6.0807 Acc: 0.7700

Epoch 2/499
----------
train Loss: 6.0657 Acc: 0.5672
val Loss: 6.0100 Acc: 1.0837

Epoch 3/499
----------
train Loss: 6.0021 Acc: 0.7775
val Loss: 5.9659 Acc: 1.4975

Epoch 4/499
----------
train Loss: 5.9417 Acc: 0.9731
val Loss: 5.9049 Acc: 1.8075

Epoch 5/499
----------
train Loss: 5.8808 Acc: 1.1847
val Loss: 5.8488 Acc: 2.0925

Epoch 6/499
----------
train Loss: 5.8220 Acc: 1.3909
val Loss: 5.7853 Acc: 2.3600

Epoch 7/499
----------
train Loss: 5.7621 Acc: 1.5309
val Loss: 5.7257 Acc: 2.6012

Epoch 8/499
----------
train Loss: 5.7010 Acc: 1.7259
val Loss: 5.6625 Acc: 2.7700

Epoch 9/499
----------
train Loss: 5.6396 Acc: 1.9019
val Loss: 5.5977 Acc: 2.9475

Epoch 10/499
----------
train Loss: 5.5770 Acc: 2.0741
val Loss: 5.5360 Acc: 3.0737

Epoch 11/499
----------
train Loss: 5.5116 Acc: 2.1963
val Loss: 5.4611 Acc

KeyboardInterrupt: 

In [10]:
a = torch.zeros(2, 3)
print(a)
b = torch.zeros(2, 3)
print(b)
c = torch.stack((a,b),0)
print(c.shape)
print(c[0])

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([[0., 0., 0.],
        [0., 0., 0.]])
torch.Size([2, 2, 3])
tensor([[0., 0., 0.],
        [0., 0., 0.]])
