图片尺寸的统计：  
max of hs:  395  
max of ws:  304  
max of hs * ws:  98040  
min of hs:  9  
min of ws:  8  
min of hs * ws:  90  
avr of hs:  46.761114568208534  
avr of ws:  41.47341492083247  
avr of hs * ws:  2743.73172924013  

In [1]:
import numpy as np
import torch
import json
import random
import imageio
from torch import nn
from torch.nn import init
from collections import OrderedDict
import cv2
import time

In [2]:
label_to_idx = {
    'i2':0, 'i4':1, 'i5':2, 'io':3, 'ip':4, 
    'p11':5, 'p23':6, 'p26':7, 'p5':8, 'pl30':9, 
    'pl40':10, 'pl5':11, 'pl50':12, 'pl60':13, 'pl80':14, 
    'pn':15, 'pne':16, 'po':17, 'w57':18
}

idx_to_label = [
    'i2', 'i4', 'i5', 'io', 'ip', 'p11', 'p23', 'p26', 'p5', 'pl30', 
    'pl40', 'pl5', 'pl50', 'pl60', 'pl80', 'pn', 'pne', 'po', 'w57'
]

# 初始化训练数据
def init_train_data():
    start = time.time()
    with open("train.json", "r") as f:
    # "train-Copy1.json为手工精简版目录，仅用于验证程序正确性"
    # with open("train-Copy1.json", "r") as f:
        dic = json.loads(f.read())
    names = list(dic)
    labels = list(dic.values())
    num_examples = len(names)
    r = random.random
    random.seed(0)
    random.shuffle(names, random = r)
    random.seed(0)
    random.shuffle(labels, random = r) # 随机打乱，用seed保证打乱顺序相同
    features = []
    idx_labels = []
    for i in range(num_examples):
        name = names[i]
        label = labels[i]
        path = "Train\\" + label + "\\" + name
        img = imageio.imread(path)
        features.append(np.array(img[:, :, 0 : 3]))
        idx_labels.append(label_to_idx[label])
    print("time %.2f sce" % (time.time() - start))
    return features, idx_labels
# features 是一个 list，元素为np.array，形状为[宽 * 高 * 3（维度）]

# 读取训练数据
def train_data_iter(batch_size, features, labels):
    num_examples = len(features)
    for i in range(0, num_examples, batch_size):
        j = min(i + batch_size, num_examples)
        yield features[i: j], labels[i: j]
        
features, labels = init_train_data()

time 123.92 sce


In [3]:
# 标准尺寸
width, height = 128, 128

# 定义线性层
num_inputs, num_outputs = width * height * 3, 19
batch_size = 256

# 设备
device = 'cuda' if torch.cuda.is_available() else 'cpu'

class FlattenLayer(nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()
    def forward(self, x):
        return x.view(x.shape[0], -1)

net = nn.Sequential(
    OrderedDict([
        ('flatten', FlattenLayer()),
        ('linear', nn.Linear(num_inputs, num_outputs))
    ])
)
init.normal_(net.linear.weight, mean = 0, std = 0.01)
init.constant_(net.linear.bias, val = 0)
net.cuda()

# 损失函数
loss = nn.CrossEntropyLoss()

# 优化器
optimizer = torch.optim.SGD(net.parameters(), lr = 0.1)

In [4]:
# 迭代
epoch_num = 50
for epoch in range(epoch_num):
    start = time.time()
    train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
    for X, y in train_data_iter(batch_size, features, labels):
        
        # 获取数据，y为tensor，形状为[batch_size]
        y = torch.tensor(y, dtype = torch.long, device = device)
        
        # 获取数据，X为tensor，形状为[bath_size * width * hight * 3(dim)]
        images = []
        for image in X:
            image = cv2.resize(image, (width, height), interpolation = cv2.INTER_AREA) # 用cv2转换大小
            image = torch.tensor(image, dtype = torch.float, device = device)
            images.append(image)
        X = torch.stack(images)
        
        # 前向运算和损失
        y_hat = net(X)
        l = loss(y_hat, y).sum()

        # 梯度清零
        optimizer.zero_grad()

        # 后向梯度
        l.backward()
        optimizer.step()

        # 统计
        train_l_sum += l.item()
        train_acc_sum += (y_hat.argmax(dim = 1) == y).sum().item()
        n += y.shape[0]
    print('epoch %d, loss, %f, train acc %.3f, time %.2f sec' % (epoch + 1, train_l_sum / n, train_acc_sum / n, time.time() - start))

epoch 1, loss, 39036.411859, train acc 0.299, time 4.01 sec
epoch 2, loss, 16611.166589, train acc 0.514, time 3.66 sec
epoch 3, loss, 9802.772350, train acc 0.601, time 3.64 sec
epoch 4, loss, 7596.515600, train acc 0.642, time 3.65 sec
epoch 5, loss, 6440.480364, train acc 0.670, time 3.67 sec
epoch 6, loss, 5149.567629, train acc 0.691, time 3.69 sec
epoch 7, loss, 4226.163573, train acc 0.718, time 3.60 sec
epoch 8, loss, 4261.432301, train acc 0.732, time 3.71 sec
epoch 9, loss, 2507.055948, train acc 0.773, time 3.77 sec
epoch 10, loss, 2569.345997, train acc 0.775, time 3.62 sec
epoch 11, loss, 2767.790599, train acc 0.773, time 3.67 sec
epoch 12, loss, 2343.684251, train acc 0.781, time 3.68 sec
epoch 13, loss, 2077.276749, train acc 0.797, time 3.60 sec
epoch 14, loss, 2533.277769, train acc 0.778, time 3.61 sec
epoch 15, loss, 2187.843094, train acc 0.796, time 3.82 sec
epoch 16, loss, 1622.127712, train acc 0.814, time 4.30 sec
epoch 17, loss, 2166.893199, train acc 0.801, t

In [6]:
# 用模型生成test文件，生成的文件名默认为test1.json
def init_test_file(net, outname = "test1.json"):
    with open("test.json", "r") as f:
        dic = json.loads(f.read())
    names = list(dic)
    num_examples = len(names)
    for name in names:
        path = "Test\\" + name
        img = imageio.imread(path)
        image = np.array(img[:, :, 0 : 3])
        image = cv2.resize(image, (width, height), interpolation = cv2.INTER_AREA)
        image = torch.tensor(image, dtype = torch.float, device = device)
        image = image.view(1, width, height, 3)
        dic[name] = idx_to_label[torch.argmax(net(image).view(19))]
    json_str = json.dumps(dic)
    with open(outname, "w") as f:
        f.write(json_str)
init_test_file(net)