this time we use fashion-mnist to test softmax regression

In [66]:
import onnx
import torch
import torchvision
from torch.utils import data
from torchvision import transforms

trans = transforms.ToTensor() # create a transformer to trans data from PIL TO TENSOR(FLOAT 32)
mnist_train = torchvision.datasets.FashionMNIST(root='../data',train=True,transform=trans, download=True)
mnist_test = torchvision.datasets.FashionMNIST(root='../data',train=False,transform=trans, download=True)

In [53]:
len(mnist_train), len(mnist_test)
# mnist_train shape: example line: data(1*28*28) label(1*1*1)

(60000, 10000)

In [67]:
def get_fashion_mnist_labels(labels):
    """返回Fashion-MNIST数据集的文本标签。"""
    text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
                   'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
    return [text_labels[int(i)] for i in labels]

batch_size = 256

def get_dataloader_workers():
    return 8

train_iter = data.DataLoader(mnist_train,batch_size=batch_size,shuffle=True,num_workers=get_dataloader_workers())

In [68]:
def load_data_fashion_mnist(batch_size ,resize = None):
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0,transforms.Resize(resize))
    trans = transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root='../data',train=True,transform=trans, download=True)
    mnist_test = torchvision.datasets.FashionMNIST(root='../data',train=False,transform=trans, download=True)

    return (
        data.DataLoader(mnist_train,batch_size,shuffle=True,num_workers=get_dataloader_workers()),
        data.DataLoader(mnist_test,batch_size,shuffle=False,num_workers=get_dataloader_workers())
    )


In [58]:
batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(256)

num_inputs = 784 # 28 x 28 x 1
num_outputs = 10 # type number
W = torch.normal(0,0.01,size=(num_inputs,num_outputs),requires_grad=True)
b = torch.zeros(num_outputs,requires_grad=True)

print(b)

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)


In [80]:
## softmax function
from IPython import display
import torch
import torchvision
from torch.utils import data
from torchvision import transforms
from d2l import sgd
class Accumulator:  #@save
    """vector adder。"""
    '''
    zip函数用于将可迭代的对象作为参数，将对象中对应的元素打包成一个个元组，
    然后返回由这些元组组成的列表。
    如果各个迭代器的元素个数不一致，则返回列表长度与最短的对象相同，
    利用 * 号操作符，可以将元组解压为列表。
    '''
    def __init__(self, n):
        self.data = [0.0] * n

    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]
# class Animator:  #@save
#     """在动画中绘制数据。"""
#     def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,
#                  ylim=None, xscale='linear', yscale='linear',
#                  fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,
#                  figsize=(3.5, 2.5)):
#         # 增量地绘制多条线
#         if legend is None:
#             legend = []
#         d2l.use_svg_display()
#         self.fig, self.axes = d2l.plt.subplots(nrows, ncols, figsize=figsize)
#         if nrows * ncols == 1:
#             self.axes = [self.axes, ]
#         # 使用lambda函数捕获参数
#         self.config_axes = lambda: d2l.set_axes(
#             self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)
#         self.X, self.Y, self.fmts = None, None, fmts
#
#     def add(self, x, y):
#         # 向图表中添加多个数据点
#         if not hasattr(y, "__len__"):
#             y = [y]
#         n = len(y)
#         if not hasattr(x, "__len__"):
#             x = [x] * n
#         if not self.X:
#             self.X = [[] for _ in range(n)]
#         if not self.Y:
#             self.Y = [[] for _ in range(n)]
#         for i, (a, b) in enumerate(zip(x, y)):
#             if a is not None and b is not None:
#                 self.X[i].append(a)
#                 self.Y[i].append(b)
#         self.axes[0].cla()
#         for x, y, fmt in zip(self.X, self.Y, self.fmts):
#             self.axes[0].plot(x, y, fmt)
#         self.config_axes()
#         display.display(self.fig)
#         display.clear_output(wait=True)

def softmax(X):
    X_exp = torch.exp(X)
    return X_exp/X_exp.sum(1, keepdim = True) # calculate sum by line

def sgd(params, lr, batch_size): #@save
    """
    date every param in params: b and w
    aram params:
    :param lr:
    :param batch_size:
    :return:
    """
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad / batch_size
            param.grad.zero_()

def net(X):
    return softmax(torch.matmul(X.reshape(-1,W.shape[0]),W)+b) # to reshape pics matrix to a line

def corss_entropy(y_hat,y):
    '''
    y_hat: the possibilities of every type in examples
    such as
    [
        [0.1,0.2,0.3,...,0.8],
        [0.1,0.2,0.3,...,0.8],
        [0.1,0.2,0.3,...,0.8]
    ]
    '''
    return -torch.log(y_hat[range(len(y_hat)),y])

def accuracy(y_hat,y):
    '''
    :param y_hat: the possibilities of every type in examples
    :param y: the index of correct type
    :return:
    '''
    if len(y_hat.shape) >1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1) # the max of a line => index or type(in this case)
        # or get the predict index of correct type
        cmp = y_hat.type(y.dtype) == y
        # it may out a vector like [f,t,t,t,t,f,f,t]
        return float(cmp.type(y.dtype).sum())

def evaluate_accuracy(net,data_iter): #@save
    if isinstance(net, torch.nn.Module):
        net.eval()
    metric = Accumulator(2)
    for X, y in data_iter: # add the correct number of every batch
        metric.add(accuracy(net(X),y),y.numel()) # correct number, data number
    return metric[0]/metric[1]

def train_epoch(net, train_iter, loss, updater):
    """
    train for one epoch
    :param net:
    :param train_iter:
    :param loss:
    :param updater:
    :return:
    """
    if isinstance(net, torch.nn.Module):
        net.train()
    metric = Accumulator(3)
    for X, y in train_iter:
        y_hat = net(X) # calculate result
        l = loss(y_hat,y) # the loss value of every example
        if isinstance(updater, torch.optim.Optimizer):
            updater.zero_grad() # the updater should get the grad of every params and the update those params
            l.backward() # the backword function well calculate the grad of every node in calculating graph
            # the l is the sum of the loss value of every example
            updater.step()
            metric.add(float(l)*len(y), accuracy(y_hat,y),y.size().numel())
        else:
            l.sum().backward() # the loss value of all example
            updater(X.shape[0])
            metric.add(float(l.sum()),accuracy(y_hat,y),y.numel())
    return metric[0] / metric[2], metric[1]/metric[2]

def train(net, train_iter, test_iter, loss, num_epochs, updater):
    for epoch in range(num_epochs):
        train_matrics = train_epoch(net,train_iter,loss,updater)
        test_acc = evaluate_accuracy(net,test_iter)
        print('train: loss:{},acc:{}'.format(train_matrics[0],train_matrics[1]))
        print('test: acc:{}'.format(test_acc))
lr = 0.1
def updater(batch_size):
    return sgd([W,b],lr,batch_size)


In [85]:
num_epoch = 100
train(net,train_iter,test_iter,corss_entropy,num_epoch,updater)


train: loss:0.3876960289001465,acc:0.8661
test: acc:0.8429
train: loss:0.38620220266977945,acc:0.8669833333333333
test: acc:0.8365
train: loss:0.3866526940027873,acc:0.8664333333333334
test: acc:0.84
train: loss:0.3862461729685466,acc:0.8664666666666667
test: acc:0.8391
train: loss:0.38529468275705975,acc:0.8672333333333333
test: acc:0.8411
train: loss:0.38535523897806806,acc:0.8668
test: acc:0.8399
train: loss:0.38497399520874026,acc:0.8668
test: acc:0.844
train: loss:0.38466697476704914,acc:0.8672
test: acc:0.8454
train: loss:0.38428556276957193,acc:0.8670833333333333
test: acc:0.8383
train: loss:0.3841831718126933,acc:0.8668833333333333
test: acc:0.8424
train: loss:0.3837581865310669,acc:0.8671333333333333
test: acc:0.8379
train: loss:0.3835071088155111,acc:0.86845
test: acc:0.8437
train: loss:0.3838327527999878,acc:0.8678333333333333
test: acc:0.8437
train: loss:0.38338331985473634,acc:0.8678833333333333
test: acc:0.8426
train: loss:0.3826045313199361,acc:0.86775
test: acc:0.844
tr

In [87]:
from torch import nn
## use the models in torch to train
net = nn.Sequential(nn.Flatten(),nn.Linear(784,10)) # flatten layout make a matrix be a vector

def init_weight(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01)
net.apply(init_weight) # ?

loss = nn.CrossEntropyLoss()

trainer = torch.optim.SGD(net.parameters(),lr = 0.1)

num_epoch = 10
train(net,train_iter,test_iter,loss,num_epoch,trainer)



train: loss:0.7870307423909505,acc:0.7473333333333333
test: acc:0.7871
train: loss:0.5693782514413198,acc:0.8141
test: acc:0.8107
train: loss:0.525407086722056,acc:0.8263
test: acc:0.8048
train: loss:0.5017995771249135,acc:0.8323
test: acc:0.8255
train: loss:0.48496064836184183,acc:0.8375166666666667
test: acc:0.8276
train: loss:0.4731929989973704,acc:0.8402
test: acc:0.8248
train: loss:0.4648489554087321,acc:0.84205
test: acc:0.8305
train: loss:0.4579583891073863,acc:0.8449333333333333
test: acc:0.8322
train: loss:0.4522715295950572,acc:0.8467333333333333
test: acc:0.8311
train: loss:0.4467018487453461,acc:0.8480166666666666
test: acc:0.8349
