In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
%run framework.ipynb

# Useful functions

In [11]:
def SGD_Momentum(params, gradients, velocity, lr=1e-3, gamma = .9):   
    for k in range(len(params)):
        velocity[k] = gamma * velocity[k] + lr * gradients[k]
        params[k] -= velocity[k]

In [12]:
def splitter(X, y, shuffle = False):
    n = y.shape[0]
    indices = np.arange(n)
    if shuffle == True:
        np.random.shuffle(indices)
    # Поделит тренировочную и тестовую выборки в соотношении 2 к 1, соответственно
    X_train = X[indices][n//3:]
    y_train = y[indices][n//3:]
    X_test = X[indices][:n//3]
    y_test = y[indices][:n//3]
    return X_train, y_train, X_test, y_test

In [13]:
def one_hot_encode(y):
    num_classes = 10
    m = y.shape[0]
    onehot = np.zeros((m, num_classes))
    for i in range(m):
        idx = y[i]
        onehot[i][idx] = 1
    return onehot

In [14]:
def loader(X, y, batch_size):    
    n = y.shape[0]
    
    indices = np.arange(n)
    np.random.shuffle(indices)
    
    for start in range(0, n, batch_size):
        
        end = min(start + batch_size, n)
        
        batch_idx = indices[start:end]
    
        yield X[batch_idx], y[batch_idx]

In [15]:
def accuracy(X, y):
    m = len(y)
    y_pred = model.forward(X)
    y_pred = np.argmax(y_pred, axis = 1)
    if len(y.shape) > 1:
        y = np.argmax(y, axis = 1)
    return np.sum(y_pred==y)/m

# MNIST

In [18]:
import mnist
size = 60000
X = mnist.train_images()[:size] / 255.0  # Скачиваем цифры и тут же нормализуем
y = mnist.train_labels()[:size] # Скачиваем лейблы
X = X.reshape(size, -1) # Сглаживаем size матриц размером
X_train, y_train, X_test, y_test = splitter(X, y, shuffle=True) #Делим на тестовую и тренировочную выборки
y_train = one_hot_encode(y_train) #Делаем one_hot энкодинг

URLError: <urlopen error [Errno 11001] getaddrinfo failed>

In [None]:
model = Sequential(
    Linear(784, 400),
    BatchNorm(400, 2),
    Dropout(0.5),
    LeakyReLU(),
    Linear(400, 200),
    BatchNorm(200, 2),
    Dropout(0.5),
    LeakyReLU(),
    Linear(200, 10),
    Sigmoid(),
    
)

criterion = CrossEntropy()

velocity = {}
for k in range(len(model.parameters())):
    velocity[k] = np.zeros_like(model.parameters()[k])

In [None]:
epochs = 10
batch_size = 64
learning_rate = 1e-5

In [None]:
history = []
model.train()
for i in range(epochs):
    for x, y_true in loader(X_train, y_train, batch_size):
        
        y_pred = model.forward(x)
        loss = criterion.forward(y_pred, y_true)
        loss = np.mean(loss) #Усредняем loss по батчу

        grad = criterion.backward(y_pred, y_true)
        model.backward(x, grad)
        
        
        SGD_Momentum(model.parameters(),
            model.grad_parameters(), 
            velocity,
            learning_rate)
        
        history.append(loss)
    print(accuracy(X_train, y_train))
    
plt.title("Training loss")
plt.xlabel("iteration")
plt.ylabel("loss")
plt.plot(history, 'g')
plt.show()

In [None]:
model.eval()
print(accuracy(X_test, y_test))