# Двухслойная нейронная сеть

Задача:
- Реализовать слои Sigmoid, Dense, Softmax, LogLoss с использование тензоров pytorch
- Реализовать двухслойную нейронную сеть для распознования цифр MNIST
- Обучить сеть. Обученная модель должна побить baseline на kaggle. 


In [1]:
import torch
from torch import FloatTensor
import time

In [2]:
import math
import numpy

In [3]:
class Sigmoid:
    def forward(self, x):
        self.x = x
        return 1./(1+torch.exp(-x))
    
    def backward(self, dz, lr):
        sm = self.forward(self.x)
        self.lp = sm*(1 - sm)
        return dz * self.lp

In [4]:
class Dense:
    def __init__(self, in_size, out_size):
        self.w = torch.rand(out_size, in_size)/10000
        self.th = torch.rand(out_size)/10000
    
    def forward(self, x):
        self.x = x
        return self.w @ self.x.float() + self.th
    
    def backward(self, dz, lr):
        ret = torch.t(self.w) @ dz
        self.w = torch.addr(1 , self.w ,-lr, dz, self.x.float())
        self.th -= lr * dz
        return ret

In [5]:
class Softmax:
    def forward(self, x):
        self.x = x
        return torch.exp(x) / torch.sum(torch.exp(x))
    
    def backward(self, dz, lr):
        
        sm = self.forward(self.x)
        df = sm * torch.t((torch.eye(sm.size()[0]) - sm))
        return torch.matmul(dz, df)


In [6]:
class LogLoss:
    
    def forward(self, y_true, y_hat):
        y = torch.zeros(10)
        y[int(y_true)] = 1
        self.y_true = y
        self.y_hat = y_hat
        return torch.sum( y * torch.log(y_hat) * (-1))
    
    def backward(self, dz, lr=0.001):
        return dz * (-1) * self.y_true / self.y_hat

In [30]:
class ReLu:
    
    def forward(self, x):
        self.x = x
        return torch.max(0, x)

    def backward(self, dz, lr=0.1):
        dz[self.x < 0] = 0
        return dz

In [7]:
class Net():
    def __init__(self, in_size, hidden_lay_size, out_size):
        self.s = Softmax()
        self.d1 = Dense(in_size, hidden_lay_size)
        self.sg = Sigmoid()
        self.d2 = Dense(hidden_lay_size, out_size)
        
    def forward(self, x):
        self.x = x
        net = self.d1.forward(x)
        net = self.sg.forward(net)
        net = self.d2.forward(net)
        net = self.s.forward(net)
        
        return net
    
    def backward(self, dz, lr):
        dz = self.s.backward(dz, lr)
        dz = self.d2.backward(dz, lr)
        dz = self.sg.backward(dz, lr)
        dz = self.d1.backward(dz, lr)
        
        return dz

# Загружаем датасет

In [8]:
from sklearn.datasets import fetch_mldata
mnist = fetch_mldata('MNIST original')
from sklearn.model_selection import train_test_split
X_train, X_test,y_train,y_test = train_test_split(mnist['data']/255, mnist['target'], test_size = 0.3)
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
Y_train = torch.FloatTensor(y_train)
Y_test = torch.FloatTensor(y_test)

# Обучаем сеть на CPU

In [9]:
net = Net(784, 100, 10)
loss = LogLoss()
lr = 0.01

def train(net, loss, lr, n_epoch):
    L = []
    for epoch in range(n_epoch):
        L_acc = 0.
        arr = numpy.arange(X_train.size(0))
        numpy.random.shuffle(arr)
        for i in arr:
            y_h = net.forward(X_train[i])
            L_acc += loss.forward(Y_train[i], y_h)
            
            dz = loss.backward(1, lr)
            net.backward(dz, lr)
        L.append(L_acc)
    return L



In [10]:
since = time.time()
L = train(net, loss, 0.01, 10)
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s \n'.format(time_elapsed // 60, time_elapsed % 60))

Training complete in 193m 47s 



In [11]:
import matplotlib.pylab as plt
plt.plot(L) # посмотрим на кривую обучения
plt.show()
print( net.forward(X_train[0]), net.forward(X_train[1])) # проверим глазами, что обучились

<matplotlib.figure.Figure at 0x7f9726003b00>


 4.1824e-07
 5.5800e-07
 8.4162e-05
 3.7316e-07
 4.5450e-06
 2.0764e-05
 9.9988e-01
 2.3911e-09
 9.0491e-06
 1.3578e-07
[torch.FloatTensor of size 10]
 
 1.1335e-07
 6.5384e-08
 8.1896e-07
 2.1154e-07
 9.9950e-01
 2.5675e-05
 1.6937e-06
 1.8505e-04
 4.8644e-05
 2.4193e-04
[torch.FloatTensor of size 10]



# Сохраняем результат для kaggle

https://www.kaggle.com/c/track-nn-2018-spring-1

In [12]:
import pickle

In [13]:
with open('./Загрузки/mnist_train.pkl', 'rb') as f:
    train = pickle.load(f)
with open('./Загрузки/mnist_test.pkl', 'rb') as d:
    test = pickle.load(d)

In [102]:
X1_train = train['data']
Y1_train = train['target']
X1_train = torch.FloatTensor(X1_train)/255
Y1_train = torch.FloatTensor(Y1_train)

In [104]:
net = Net(784, 100, 10)
loss = LogLoss()
lr = 0.01


def train(net, loss, lr, n_epoch):
    L = []
    for epoch in range(n_epoch):
        L_acc = 0.
        arr = numpy.arange(X1_train.size(0))
        numpy.random.shuffle(arr)
        for i in arr:
            y_h = net.forward(X1_train[i])
            L_acc += loss.forward(Y1_train[i], y_h)
            
            dz = loss.backward(1, lr)
            net.backward(dz, lr)
        L.append(L_acc)
    return L



In [105]:
since = time.time()
L = train(net, loss, lr, 5)
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s \n'.format(time_elapsed // 60, time_elapsed % 60))

Training complete in 49m 16s 



In [14]:
X1_test = test['data']
X1_test = torch.FloatTensor(X1_test)/255

In [15]:
with open('./masha_test.csv', 'wt') as f:
    f.write('id,p0,p1,p2,p3,p4,p5,p6,p7,p8,p9\n')
    for i in range(X1_test.shape[0]):
        v = net.forward(X1_test[i])
        f.write(','.join([str(i)] + list(map(str, v.tolist()))) + '\n')