In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import numpy as np

## Lets get the data, model and setup training code

In [2]:
train_loader = DataLoader(datasets.MNIST("./", train=True, transform=transforms.ToTensor(), download=True), batch_size=128, shuffle=True)
test_loader = DataLoader(datasets.MNIST("./", train=False, transform=transforms.ToTensor(), download=True), batch_size=128, shuffle=False)

print(f"Training images {len(train_loader.dataset)}, Test images {len(test_loader.dataset)}")

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST\raw\train-images-idx3-ubyte.gz


0it [00:00, ?it/s]

Extracting ./MNIST\raw\train-images-idx3-ubyte.gz to ./MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST\raw\train-labels-idx1-ubyte.gz


0it [00:00, ?it/s]

Extracting ./MNIST\raw\train-labels-idx1-ubyte.gz to ./MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST\raw\t10k-images-idx3-ubyte.gz


0it [00:00, ?it/s]

Extracting ./MNIST\raw\t10k-images-idx3-ubyte.gz to ./MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST\raw\t10k-labels-idx1-ubyte.gz


0it [00:00, ?it/s]

Extracting ./MNIST\raw\t10k-labels-idx1-ubyte.gz to ./MNIST\raw
Processing...
Done!

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)



Training images 60000, Test images 10000


In [3]:
class mnist_model(nn.Module):
  def __init__(self):
    super(mnist_model, self).__init__()
    self.layer1 = nn.Conv2d(1, 5, kernel_size=2, stride=2, padding=0)
    self.layer2 = nn.Linear(980, 100, bias=True)
    self.layer3 = nn.Linear(100, 10, bias=True)
    self.act = nn.ReLU()

  def forward(self, x):
    out = self.act(self.layer1(x))
    out = out.view(-1, 980)
    out = self.act(self.layer2(out))
    out = self.layer3(out)
    return out

  def output(self, x):
    out1 = self.act(self.layer1(x))
    out1 = out1.view(-1, 980)
    out2 = self.act(self.layer2(out1))
    out3 = self.layer3(out2)
    return out1, out2, out3

In [4]:
model = mnist_model().cuda()
print(model)

epochs = 15
lr = 0.1

optimizer = optim.SGD(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()
lrs = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)

mnist_model(
  (layer1): Conv2d(1, 5, kernel_size=(2, 2), stride=(2, 2))
  (layer2): Linear(in_features=980, out_features=100, bias=True)
  (layer3): Linear(in_features=100, out_features=10, bias=True)
  (act): ReLU()
)


## Training

In [5]:
def get_acc(model, loader):
  correct = 0
  total = 0
  for img, label in loader:
    correct += torch.sum(torch.argmax(model(img.cuda()), -1).cpu() == label).item()
    total += len(img)
  return 100*correct/total

In [6]:
for e in range(epochs):
  print("lr", optimizer.param_groups[0]["lr"])
  for img, label in train_loader:
    # print(img.shape, label.shape)
    out = model(img.cuda())
    # print(out.shape)
    optimizer.zero_grad()
    loss = criterion(out, label.cuda())
    loss.backward()
    optimizer.step()
  lrs.step()
  print(f"Epoch {e}, training accuracy {get_acc(model, train_loader)}, test accuracy {get_acc(model, test_loader)}")

lr 0.1
Epoch 0, training accuracy 92.69, test accuracy 93.04
lr 0.09890738003669029
Epoch 1, training accuracy 95.78666666666666, test accuracy 95.81
lr 0.09567727288213004
Epoch 2, training accuracy 96.615, test accuracy 96.29
lr 0.09045084971874738
Epoch 3, training accuracy 97.34333333333333, test accuracy 96.89
lr 0.08345653031794292
Epoch 4, training accuracy 97.72, test accuracy 97.12
lr 0.07500000000000001
Epoch 5, training accuracy 97.71333333333334, test accuracy 97.11
lr 0.06545084971874739
Epoch 6, training accuracy 98.35833333333333, test accuracy 97.51
lr 0.05522642316338269
Epoch 7, training accuracy 98.52666666666667, test accuracy 97.58
lr 0.04477357683661735
Epoch 8, training accuracy 98.665, test accuracy 97.74
lr 0.03454915028125265
Epoch 9, training accuracy 98.82, test accuracy 97.9
lr 0.02500000000000002
Epoch 10, training accuracy 98.82666666666667, test accuracy 97.89
lr 0.01654346968205711
Epoch 11, training accuracy 98.90666666666667, test accuracy 97.83
lr 0.

## Extract weights

In [7]:
params = [(name, p.data.cpu().numpy()) for (name, p) in model.named_parameters()]

In [8]:
for (name, p) in params:
  print(f"Layer {name.split('.')[0]}, type {name.split('.')[1]}, shape {p.shape}")

Layer layer1, type weight, shape (5, 1, 2, 2)
Layer layer1, type bias, shape (5,)
Layer layer2, type weight, shape (100, 980)
Layer layer2, type bias, shape (100,)
Layer layer3, type weight, shape (10, 100)
Layer layer3, type bias, shape (10,)


In [9]:
#print(params)

## Visualize hidden activations

In [10]:
# print(model.children())
# out = list(model.children())[0](img.cuda()).data.cpu().numpy()

In [11]:
# import matplotlib.pyplot as plt
# %matplotlib inline

# for _ in range(out.shape[1]):
#   plt.figure(figsize=(1, 1))
#   plt.imshow(out[0, 0], cmap="gray")

In [12]:
for img, label in train_loader:
  break

In [23]:

path = "./Sarda/"
import os
if not os.path.isdir(path):
    os.mkdir(path)
np.savetxt(fname=path+"label", delimiter=" ", X=label.tolist())
print(get_acc(model, ([img,label],)))
print(model.output(img.cuda().view(128, 1, 28, 28))[2].tolist()[1])

96.875
[-2.5631444454193115, 0.36498868465423584, 5.231445789337158, 11.76509952545166, -10.27979850769043, 5.50112771987915, -8.185359954833984, -4.739908695220947, 1.422985315322876, 0.6781705021858215]


In [14]:
import os

np.savetxt(fname=path+"input_0", delimiter=" ", X=img.cuda().view(-1, 784).tolist())
np.savetxt(fname=path+"outputlayer1_0", delimiter=" ", X=model.output(img.cuda().view(128, 1, 28, 28))[0].tolist())
np.savetxt(fname=path+"outputlayer2_0", delimiter=" ", X=model.output(img.cuda().view(128, 1, 28, 28))[1].tolist())
np.savetxt(fname=path+"outputlayer3_0", delimiter=" ", X=model.output(img.cuda().view(128, 1, 28, 28))[2].tolist())

np.savetxt(fname=path+"weight1_0", delimiter=" ", X=params[0][1].reshape(2*2*1, 5).tolist())
np.savetxt(fname=path+"bias1_0", delimiter=" ", X=params[1][1].tolist())
np.savetxt(fname=path+"weight2_0", delimiter=" ", X=params[2][1].tolist())
np.savetxt(fname=path+"bias2_0", delimiter=" ", X=params[3][1].tolist())
np.savetxt(fname=path+"weight3_0", delimiter=" ", X=params[4][1].tolist())
np.savetxt(fname=path+"bias3_0", delimiter=" ", X=params[5][1].tolist())