In [19]:
import torch
import torch.nn as nn
import numpy as np

In [20]:
path = "../Data/"

In [21]:
X_train = np.load(path + 'X_train.npy')/255
X_test = np.load(path + 'X_test.npy')/255
y_train = np.load(path + 'y_train.npy')
y_test = np.load(path + 'y_test.npy')

In [22]:
X_train = torch.tensor(X_train, dtype=torch.float32).view(-1, 28*28)
y_train = torch.tensor(y_train, dtype=torch.long).view(-1)
X_test = torch.tensor(X_test, dtype=torch.float32).view(-1, 28*28)
y_test = torch.tensor(y_test, dtype=torch.long).view(-1)

In [23]:
X_train.shape, y_train.shape

(torch.Size([60000, 784]), torch.Size([60000]))

### 784 -> 64 -> 64 -> 32 -> 32 -> 10

In [29]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28*28, 512)
        self.fc2 = nn.Linear(512, 512)
        self.fc3 = nn.Linear(512, 256)
        self.fc4 = nn.Linear(256, 64)
        self.fc5 = nn.Linear(64, 10)
        
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.relu(self.fc4(x))
        x = self.fc5(x)        
        return x

In [37]:
model = Net()

In [31]:
list(model.parameters())[0] #W1

Parameter containing:
tensor([[-0.0246,  0.0353, -0.0206,  ...,  0.0109,  0.0299, -0.0251],
        [-0.0146,  0.0079, -0.0103,  ..., -0.0240,  0.0184, -0.0046],
        [ 0.0150,  0.0288, -0.0321,  ...,  0.0217,  0.0222, -0.0119],
        ...,
        [ 0.0190, -0.0106,  0.0195,  ..., -0.0340,  0.0050, -0.0235],
        [ 0.0349, -0.0054, -0.0232,  ...,  0.0032,  0.0301,  0.0026],
        [-0.0028,  0.0304, -0.0205,  ..., -0.0121,  0.0298,  0.0099]],
       requires_grad=True)

In [32]:
model

Net(
  (fc1): Linear(in_features=784, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=256, bias=True)
  (fc4): Linear(in_features=256, out_features=64, bias=True)
  (fc5): Linear(in_features=64, out_features=10, bias=True)
  (relu): ReLU()
)

In [43]:
np.random.permutation(torch.arange(10))

array([8, 5, 3, 2, 9, 1, 6, 0, 7, 4], dtype=int64)

In [46]:
def get_batch_index(m, b): #m data size, b batch size
    indexes = np.random.permutation(torch.arange(m))    
    for i in range(0, m, b):
        yield indexes[i:i+b]

In [47]:
for ind in get_batch_index(100, 16):
    print(ind)

[58 98 85 46 25 24 36 17 15 97 14 77 68 81 73  4]
[91 59 61 78 99  0 35 41 10 75 51 19 60 93 39 67]
[ 7 65 45 70 33  8 32 86 28  1 88 26 56 18 95 54]
[12 40 42 80 89 38  9 23 90 57 27 82 21 22 50 31]
[55 44 69  2 13 34 37  6 20 94 87 66 76 11 47 74]
[30 92 96 72 48 71 84 62  3 63 29 64 49  5 79 83]
[16 53 52 43]


In [48]:
learning_rate = 0.001
epochs = 2
batch_size = 64
m = X_train.shape[0]
test_size = X_test.shape[0]

crossentropy = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(epochs):
    epoch_loss = 0
    epoch_acc = 0
    
    for batch_i in get_batch_index(m, batch_size):
        X_batch = X_train[batch_i]
        y_batch = y_train[batch_i]
        
        optimizer.zero_grad()

        out = model(X_batch)
        
        loss = crossentropy(out, y_batch)
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        
        predictions = torch.argmax(out, 1)
        epoch_acc += torch.sum(predictions == y_batch).item()
    
    test_loss = 0
    test_acc = 0
    for batch_i in range(0, test_size, batch_size):
        X_batch = X_test[batch_i:batch_i+batch_size]
        y_batch = y_test[batch_i:batch_i+batch_size]
        
        out = model(X_batch)
        
        loss = crossentropy(out, y_batch)
        test_loss += loss.item()
        predictions = torch.argmax(out, 1)
        test_acc += torch.sum(predictions == y_batch).item()
        
    print("Train Loss:", epoch_loss/(m/batch_size), "Train Acc:", epoch_acc/m)
    print("Test Loss:", test_loss/(test_size/batch_size), "Test Acc:", test_acc/test_size)
    print("="*20)

Train Loss: 0.3000472282648087 Train Acc: 0.8896833333333334
Test Loss: 0.33963531460762025 Test Acc: 0.8758
Train Loss: 0.27873550991217294 Train Acc: 0.8968833333333334
Test Loss: 0.36269821648597717 Test Acc: 0.8731
