In [1]:
import os
from skimage import io
import torchvision.datasets.mnist as mnist

# read the emnist dataset 
train_set = (
mnist.read_image_file('./dataset/emnist-mnist-train-images-idx3-ubyte/emnist-mnist-train-images-idx3-ubyte'),
mnist.read_label_file('./dataset/emnist-mnist-train-labels-idx1-ubyte/emnist-mnist-train-labels-idx1-ubyte')
)
test_set = (
mnist.read_image_file('./dataset/emnist-mnist-train-images-idx3-ubyte/emnist-mnist-train-images-idx3-ubyte'),
mnist.read_label_file('./dataset/emnist-mnist-train-labels-idx1-ubyte/emnist-mnist-train-labels-idx1-ubyte')
)
#They are in 'tensor' type.

print(train_set[0][1].shape)



torch.Size([28, 28])


In [2]:
def convert_to_img(train=True):
    
    if train:  # 如果是训练数据
        f = open('train.txt', 'w')
        data_path ='train/' # 好像可以删去左边的/
        # 如果不存在就新建
        if not os.path.exists(data_path):
            os.makedirs(data_path)
            # enumerate将可遍历对象 组合成索引 可加参数start=2 索引从2开始
        for i, (img, label) in enumerate(zip(train_set[0], train_set[1])):
            img_path = data_path+str(i)+'.jpg'
            # 保存图片
            io.imsave(img_path, img.numpy())
            # 保存标号文件路径和标号
            f.write(img_path + ' ' + str(label.item()) + '\n')

        f.close()
    else:
        f = open('test.txt', 'w')
        data_path ='test/'
        if not os.path.exists(data_path):
            os.makedirs(data_path)
        for i, (img, label) in enumerate(zip(test_set[0], test_set[1])):
            img_path = data_path + str(i) + '.jpg'
            io.imsave(img_path, img.numpy())
            f.write(img_path + ' ' + str(label.item()) + '\n')
        f.close()


if os.path.exists('train'): #如果目录不存在就返回False
    print("Pictures(train) already converted.")

else:
    print("Building training set...")
    convert_to_img(True)
    print("叮！图像转换完成！赞美太阳！")

if os.path.exists('test'):  # 如果目录不存在就返回False
    print("Pictures(test) already converted.")
else:
    print("Building test set...")
    convert_to_img(False)
    print("叮！图像转换完成！赞美太阳！")


Pictures(train) already converted.
Pictures(test) already converted.


In [3]:
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
def default_loader(path):
    return Image.open(path).convert('RGB')


class MyDataset(Dataset):
    # txt是路径和文件名
    def __init__(self, txt, transform=transforms.ToTensor(), target_transform=None, loader=default_loader):
        fh = open(txt, 'r')  # 只读打开
        imgs = []
        for line in fh:
            line = line.strip('\n')  # 删除 回车
            line = line.rstrip()  # 删除 右侧 空格
            words = line.split()  # 分割：就两列，0列是路径 1列是标号

            imgs.append((words[0], int(words[1]))) # (address,label)
        self.imgs = imgs
        self.transform = transform
        self.target_transform = target_transform
        self.loader = loader  # 是个函数

    # train_loader里面的
    def __getitem__(self, index):
        fn, label = self.imgs[index]   # fn是完整路径 label是标号
        img = self.loader(fn)  # 调用上面的default_loader(path) 按照路径读取图片
        if self.transform is not None:
            img = self.transform(img)  # 将图片转换成FloatTensor类型
        return img, label

    def __len__(self):
        return len(self.imgs)

In [4]:
print("Reading train_data...")
train_data = MyDataset(txt='train.txt', transform=transforms.ToTensor())
# from torch.utils.data import Dataset, DataLoader 下面的函数在这里
train_loader = DataLoader(dataset=train_data, batch_size=50, shuffle=True)
print("Reading test_data...")
test_data = MyDataset(txt='test.txt', transform=transforms.ToTensor())
test_loader = DataLoader(dataset=test_data, batch_size=50,shuffle=False)

Reading train_data...
Reading test_data...


In [5]:
import torch.nn
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = torch.nn.Sequential(  # (1,28,28)
            torch.nn.Conv2d(3, 16, 5,1,2),  # (16,28,28)
            # 想要con2d卷积出来的图片尺寸没有变化, padding=(kernel_size-1)/2
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2)  # (16,14,14)
        )
        self.conv2 = torch.nn.Sequential(
           torch.nn.Conv2d(16, 64, 3), # (64,12,12)
           torch.nn.ReLU(),
           torch.nn.MaxPool2d(2) # (64,6,6)
       )
        self.conv3 = torch.nn.Sequential(
           torch.nn.Conv2d(64, 64, 3),# (64,4,4)
           torch.nn.ReLU(),
           torch.nn.MaxPool2d(2)# (64,2,2)
       )
        self.fc = torch.nn.Linear(256,10)
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = x.view(x.size(0), -1)  # 将（batch，64,2,2）展平为（batch，256）
        x = self.fc(x)
        return x

In [8]:
import torch.optim as optim
model = Net()
# construct loss and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

In [9]:
def train(epoch):
    running_loss = 0.0
    for batch_idx, data in enumerate(train_loader, 0):
        inputs, target = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if batch_idx % 300 == 299:
            print('[%d, %5d] loss: %.3f' % (epoch+1, batch_idx+1, running_loss/300))
            running_loss = 0.0

In [12]:
import torch
def test():
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, dim=1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print('accuracy on test set: %d %% ' % (100*correct/total))

In [None]:
if __name__ == '__main__':
    for epoch in range(10):
        train(epoch)
        test()

[1,   300] loss: 0.107
[1,   600] loss: 0.098
[1,   900] loss: 0.086
[1,  1200] loss: 0.078
accuracy on test set: 97 % 
[2,   300] loss: 0.071
