# MNIST recognition. 
1. define a three-layer fully connected neural network.  
2. Add activation function in a class.  
    nn.ReLU(True)
3. Implement batch norm for the first two layers.   
    nn.BatchNorm1d(n_hidden1)
4. Train the model.

In [1]:
import torch
from torch import nn, optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.datasets import mnist
import numpy as np

In [2]:
batch_size = 64
learning_rate = 1e-2
epoches = 20

## torchvision.transforms对数据进行预处理。  
1. transforms.ToTensor(): 将图片转换成pytorch中的Tensor，并自动将图片标准化。Tensor的范围：0——1

2. transforms.Normalize(parameter1, parameter2)： parameter1:均值；parameter2：方差。 减去均值，除以方差

In [3]:
data_tf1 = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.5],[0.5])])

In [4]:
def data_tf(x):
    x = np.array(x, dtype='float32') / 255
    x = (x - 0.5) / 0.5 # 标准化，这个技巧之后会讲到
    x = x.reshape((-1,)) # 拉平
    x = torch.from_numpy(x)
    return x

In [5]:
x_train = mnist.MNIST(root='./data', train=True, transform=data_tf, download=True)
x_test = mnist.MNIST(root='./data', train=False, transform=data_tf, download=True)

# x_train_loader = DataLoader(x_train, batch_size=batch_size, shuffle=True)
# x_test_loader = DataLoader(x_test, batch_size=batch_size, shuffle=False)

In [6]:
x_t = mnist.MNIST(root='./data', train=True, download=True)
tt, tta = x_t[0]
print(tt)  #tt.size:(28, 28)
tta

<PIL.Image.Image image mode=L size=28x28 at 0x11AB68D10>


5

In [7]:
a_data, a_label = x_train[0]
print(a_data.shape)
a_label

torch.Size([784])


5

### 这里读入的数据是PIL.Image.Image格式，将其转换为numpy array  
(28, 28) is the shape the image.   

In [8]:
import numpy as np
a_data = np.array(a_data, dtype='float32')
print(a_data.shape)

(784,)


In [9]:
x_train_loader = DataLoader(x_train, batch_size=64, shuffle=True)
x_test_loader = DataLoader(x_test, batch_size=128, shuffle=False)

In [10]:
a, a_label = next(iter(x_train_loader))
print(a_label)
print(a.shape)

tensor([8, 7, 6, 3, 3, 7, 4, 2, 0, 9, 0, 6, 9, 5, 3, 8, 2, 7, 3, 4, 0, 2, 5, 1,
        2, 5, 2, 7, 6, 4, 4, 2, 8, 0, 7, 0, 2, 8, 4, 5, 8, 0, 3, 4, 4, 8, 7, 8,
        3, 8, 5, 2, 2, 3, 1, 4, 7, 0, 2, 1, 4, 2, 4, 0])
torch.Size([64, 784])


In [18]:
# Define the model: three layers fully connected neural network
class ThreeLayersConnected(nn.Module):
    def __init__(self, in_dim, hidden1, hidden2, hidden3, out_dim):
        super(ThreeLayersConnected, self).__init__()
        self.layer1 = nn.Linear(in_dim, hidden1)
        self.layer2 = nn.Linear(hidden1, hidden2)
        self.layer3 = nn.Linear(hidden2, hidden3)
        self.layer4 = nn.Linear(hidden3, out_dim)
        
    def forwad(x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        out = self.layer4(x)
        return out

# Define the activation function: ReLU
class Activation(nn.Module):
    def __init__(self, in_dim, hidden1, hidden2, out_dim):
        super(Activation, self).__init__()
        self.layer1 = nn.Sequential(nn.Linear(in_dim, hidden1), nn.ReLU(True))
        self.layer2 = nn.Sequential(nn.Linear(hidden1, hidden2), nn.ReLU(True))
        self.layer3 = nn.Sequential(nn.Linear(hidden2, out_dim))
    
    def forward(x):
        out1 = self.layer1(x)
        out2 = self.layer2(out1)
        out = self.layer3(out2)
        return out
    
# ADD batch norm in the neural network
class Batch_Net(nn.Module):
    def __init__(self, in_dim, hidden1, hidden2, out_dim):
        super(Activation, self).__init__()
        self.layer1 = nn.Sequential(nn.Linear(in_dim, hidden1), nn.BatchNorm1d(hidden1), nn.ReLU(True))
        self.layer2 = nn.Sequential(nn.Linear(hidden1, hidden2), nn.BatchNorm1d(hidden2), nn.ReLU(True))
        self.layer3 = nn.Sequential(nn.Linear(hidden2, out_dim))
    
    def forward(x):
        out1 = self.layer1(x)
        out2 = self.layer2(out1)
        out = self.layer3(out2)
        return out

In [19]:
simple_model = ThreeLayersConnected(784, 400, 200, 100, 10)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(simple_model.parameters(), lr=1e-3)

In [20]:
simple_model

ThreeLayersConnected(
  (layer1): Linear(in_features=784, out_features=400, bias=True)
  (layer2): Linear(in_features=400, out_features=200, bias=True)
  (layer3): Linear(in_features=200, out_features=100, bias=True)
  (layer4): Linear(in_features=100, out_features=10, bias=True)
)

In [21]:
import pixiedust

In [23]:
#%%pixie_debugger
# Train

for e in range(20):
    simple_model.train()
    for im, label in x_train_loader:
        im = im.view(im.size(0), -1)
        im = Variable(im)
        label = Variable(label)
        
        #forward pass
        out = simple_model(im)
        loss = criterion(out, label)
        print(loss)
        #backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
       
    # simple_model.eval()
    

NotImplementedError: 

In [38]:
for im, lable in x_test_loader:
        im = Variable(im)
        label = Variable(label)
        out = simple_model(im)
        loss = criterion(out, label)
        print(loss.item())

NotImplementedError: 