## Лабораторная работа по курсу "Искусственный интеллект"
## Многослойный персептрон

| Студент | Иоффе |
|---------|--------|
| Группа  | М8О-114М-21      |

Для начала, скачаем датасет MNIST. Используйте `wget` или `curl`, либо скачайте вручную [по ссылке](https://raw.githubusercontent.com/shwars/NeuroWorkshop/master/Data/MNIST/mnist.pkl.gz).

In [1]:
#!wget https://raw.githubusercontent.com/shwars/NeuroWorkshop/master/Data/MNIST/mnist.pkl.gz
!curl -o mnist.pkl.gz https://raw.githubusercontent.com/shwars/NeuroWorkshop/master/Data/MNIST/mnist.pkl.gz
!gzip -d mnist.pkl.gz

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0  9.9M    0 15081    0     0  58793      0  0:02:56 --:--:--  0:02:56 58910
100  9.9M  100  9.9M    0     0  14.5M      0 --:--:-- --:--:-- --:--:-- 14.5M
"gzip" �� ���� ����७��� ��� ���譥�
��������, �ᯮ��塞�� �ணࠬ��� ��� ������ 䠩���.


Теперь загружаем датасет:

In [3]:
import pickle
with open('mnist.pkl','rb') as f:
    MNIST = pickle.load(f)

In [4]:
labels = MNIST['Train']['Labels']
data = MNIST['Train']['Features']

Смотрим на то, какие данные получились:

In [9]:
data.shape

(42000, 784)

Используйте Scikit Learn для разбиения данных на обучающую и тестовую выборку

In [14]:
import numpy as np
from sklearn.model_selection import train_test_split

data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=0.33, random_state=42)

In [15]:
data_train.shape

(28140, 784)

In [16]:
class Linear:
    def __init__(self,nin,nout):
        self.W = np.random.normal(0, 1.0/np.sqrt(nin), (nout, nin))
        self.b = np.zeros((1,nout))
        self.dW = np.zeros_like(self.W)
        self.db = np.zeros_like(self.b)
        
    def forward(self, x):
        self.x=x
        return np.dot(x, self.W.T) + self.b
    
    def backward(self, dz):
        dx = np.dot(dz, self.W)
        dW = np.dot(dz.T, self.x)
        db = dz.sum(axis=0)
        self.dW = dW
        self.db = db
        return dx
    
    def update(self,lr):
        self.W -= lr*self.dW
        self.b -= lr*self.db

In [17]:
class Softmax:
    def forward(self,z):
        self.z = z
        zmax = z.max(axis=1,keepdims=True)
        expz = np.exp(z-zmax)
        Z = expz.sum(axis=1,keepdims=True)
        return expz / Z
    def backward(self,dp):
        p = self.forward(self.z)
        pdp = p * dp
        return pdp - p * pdp.sum(axis=1, keepdims=True)
    
class CrossEntropyLoss:
    def forward(self,p,y):
        self.p = p
        self.y = y
        p_of_y = p[np.arange(len(y)), y]
        log_prob = np.log(p_of_y)
        return -log_prob.mean()
    def backward(self,loss):
        dlog_softmax = np.zeros_like(self.p)
        dlog_softmax[np.arange(len(self.y)), self.y] -= 1.0/len(self.y)
        return dlog_softmax / self.p

In [119]:
#Y = 1 / 1+e -z
class Sigmoid:
    def forward(self,z):
        z = 1 / (1 + np.exp(-z))
        self.z = z
        return z
    def backward(self,dz):
        return self.z*(1-self.z)*dz


class Tanh:
    def forward(self,x):
        y = np.tanh(x)
        self.y = y
        return y
    def backward(self,dy):
        return (1.0-self.y**2)*dy

In [115]:
class Net:
    def __init__(self):
        self.layers = []
    
    def add(self,l):
        self.layers.append(l)
        
    def forward(self,x):
        for l in self.layers:
            x = l.forward(x)
        return x
    
    def backward(self,z):
        for l in self.layers[::-1]:
            z = l.backward(z)
        return z
    
    def update(self,lr):
        for l in self.layers:
            if 'update' in l.__dir__():
                l.update(lr)

    def train_epoch(self, train_x, train_labels, loss=CrossEntropyLoss(), batch_size=4, lr=1e-5):
        for i in range(0,len(train_x),batch_size):
            xb = train_x[i:i+batch_size]
            yb = train_labels[i:i+batch_size]

            p = self.forward(xb)
            l = loss.forward(p,yb)
            dp = loss.backward(l)
            dx = self.backward(dp)
            net.update(lr)


    def get_loss_acc(self,x,y,loss=CrossEntropyLoss()):
        p = net.forward(x)
        l = loss.forward(p,y)
        pred = np.argmax(p,axis=1)
        acc = (pred==y).mean()
        return l,acc

In [144]:
net = Net()
net.add(Linear(784,400))
net.add(Tanh())
net.add(Linear(400,10))
net.add(Softmax())
loss = CrossEntropyLoss()

print("Initial loss={}, accuracy={}: ".format(*net.get_loss_acc(data_train,labels_train)))

net.train_epoch(data_train,labels_train, loss=CrossEntropyLoss(), batch_size=4, lr=1e-3)
        
print("Final loss={}, accuracy={}: ".format(*net.get_loss_acc(data_train,labels_train)))
print("Test loss={}, accuracy={}: ".format(*net.get_loss_acc(data_test,labels_test)))

Initial loss=2.5940558928416704, accuracy=0.12391613361762616: 
Final loss=0.3913572785320046, accuracy=0.8918265813788202: 
Test loss=0.3994715421631222, accuracy=0.8891774891774892: 


In [154]:
net = Net()
net.add(Linear(784,400))
net.add(Sigmoid())
net.add(Linear(400,10))
net.add(Softmax())
loss = CrossEntropyLoss()

print("Initial loss={}, accuracy={}: ".format(*net.get_loss_acc(data_train,labels_train)))

net.train_epoch(data_train,labels_train, loss=CrossEntropyLoss(), batch_size=4, lr=2e-3)
        
print("Final loss={}, accuracy={}: ".format(*net.get_loss_acc(data_train,labels_train)))
print("Test loss={}, accuracy={}: ".format(*net.get_loss_acc(data_test,labels_test)))

Initial loss=2.605618184471634, accuracy=0.08407960199004975: 
Final loss=0.36651972726620624, accuracy=0.9038024164889836: 
Test loss=0.37828628913391, accuracy=0.8999278499278499: 


In [168]:
net = Net()
net.add(Linear(784,300))
net.add(Sigmoid())
net.add(Linear(300,50))
net.add(Sigmoid())
net.add(Linear(50,10))
net.add(Softmax())
loss = CrossEntropyLoss()

print("Initial loss={}, accuracy={}: ".format(*net.get_loss_acc(data_train,labels_train)))

net.train_epoch(data_train,labels_train, loss=CrossEntropyLoss(), batch_size=4, lr=2e-3)
        
print("Final loss={}, accuracy={}: ".format(*net.get_loss_acc(data_train,labels_train)))
print("Test loss={}, accuracy={}: ".format(*net.get_loss_acc(data_test,labels_test)))

Initial loss=2.3729930040918283, accuracy=0.11560056858564322: 
Final loss=0.8697533388151167, accuracy=0.8501776830135039: 
Test loss=0.8763696958277388, accuracy=0.850937950937951: 


In [160]:
def load_mnist(path, kind='train'):
    import os
    import gzip
    import numpy as np

    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               '%s-labels-idx1-ubyte.gz'
                               % kind)
    images_path = os.path.join(path,
                               '%s-images-idx3-ubyte.gz'
                               % kind)

    with gzip.open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
                               offset=8)

    with gzip.open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,
                               offset=16).reshape(len(labels), 784)

    return images, labels
    

In [164]:
X_train, y_train = load_mnist('', kind='train')


In [166]:
X_train.shape

(60000, 784)

In [167]:
data_fashion_train, data_fashion_test, labels_fashion_train, labels_fashion_test = train_test_split(X_train, y_train, test_size=0.33, random_state=42)

In [171]:
net = Net()
net.add(Linear(784,400))
net.add(Sigmoid())
net.add(Linear(400,10))
net.add(Softmax())
loss = CrossEntropyLoss()

print("Initial loss={}, accuracy={}: ".format(*net.get_loss_acc(data_fashion_train,labels_fashion_train)))

net.train_epoch(data_fashion_train,labels_fashion_train, loss=CrossEntropyLoss(), batch_size=4, lr=1e-4)
        
print("Final loss={}, accuracy={}: ".format(*net.get_loss_acc(data_fashion_train,labels_fashion_train)))
print("Test loss={}, accuracy={}: ".format(*net.get_loss_acc(data_fashion_test,labels_fashion_test)))

Initial loss=2.3756211549568684, accuracy=0.12343283582089552: 
Final loss=0.9085345290498681, accuracy=0.7518407960199005: 
Test loss=0.9139401958875485, accuracy=0.7476767676767677: 


In [184]:
net = Net()
net.add(Linear(784,400))
net.add(Sigmoid())
net.add(Linear(400,100))
net.add(Sigmoid())
net.add(Linear(100,10))
net.add(Softmax())
loss = CrossEntropyLoss()

print("Initial loss={}, accuracy={}: ".format(*net.get_loss_acc(data_fashion_train,labels_fashion_train)))

net.train_epoch(data_fashion_train,labels_fashion_train, loss=CrossEntropyLoss(), batch_size=4, lr=1e-6)
        
print("Final loss={}, accuracy={}: ".format(*net.get_loss_acc(data_fashion_train,labels_fashion_train)))
print("Test loss={}, accuracy={}: ".format(*net.get_loss_acc(data_fashion_test,labels_fashion_test)))

Initial loss=2.394045004628358, accuracy=0.07074626865671642: 
Final loss=2.383507142703886, accuracy=0.07370646766169155: 
Test loss=2.3826876993071107, accuracy=0.07090909090909091: 
