In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as tt
import matplotlib.pyplot as plt

In [11]:
N_NETWORKS = 100
BATCH_SIZE = 32
N_EPOCHS   = 1

In [12]:
TRAIN_DATA_PATH = './NOTMNIST/notMNIST_large'
TEST_DATA_PATH = './NOTMNIST/notMNIST_small'
MODEL_STORE_PATH = './models_notMNIST/model_'
model_paths = []
for i in range(100):
    model_paths.append(MODEL_STORE_PATH + str(i) + '.pt')
    file = open(model_paths[i], 'w+')

In [13]:
#transform data to tensor and normalize (values state for MNIST!)
trans = tt.Compose([tt.Grayscale(), tt.ToTensor()]) 

train_dataset = torchvision.datasets.ImageFolder(root=TRAIN_DATA_PATH, transform=trans) 
test_dataset  = torchvision.datasets.ImageFolder(root=TEST_DATA_PATH, transform=trans)

layer0 = train_dataset.__getitem__(110000)[0]
print(layer0.shape)
print(layer0[0, :, :])

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True) 
test_loader  = DataLoader(dataset=test_dataset,  batch_size=BATCH_SIZE, shuffle=False)

torch.Size([1, 28, 28])
tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0118, 0.0039, 0.0000,
         0.0000, 0.1059, 0.3373, 0.4980, 0.5725, 0.6549, 0.6627, 0.5961, 0.4667,
         0.2627, 0.0549, 0.0000, 0.0039, 0.0118, 0.0039, 0.1725, 0.2039, 0.2039,
         0.0902],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0039, 0.0078, 0.0000, 0.0275, 0.3725,
         0.7490, 0.9333, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
         1.0000, 0.8745, 0.5176, 0.0902, 0.0000, 0.2000, 0.9922, 0.9961, 1.0000,
         0.4471],
        [0.0000, 0.0000, 0.0000, 0.0039, 0.0118, 0.0000, 0.3255, 0.8510, 1.0000,
         0.9961, 1.0000, 0.9961, 0.9804, 0.9765, 0.9843, 0.9922, 0.9882, 0.9882,
         0.9843, 0.9961, 1.0000, 0.9216, 0.2980, 0.5608, 1.0000, 0.9804, 1.0000,
         0.4431],
        [0.0000, 0.0000, 0.0039, 0.0000, 0.0118, 0.5529, 1.0000, 0.9961, 0.9882,
         0.9882, 0.9922, 0.9922, 1.0000, 1.0000, 1.0000, 0.9922, 0.9961, 1.0000,
         1.0000, 1.0000, 0.9804

In [14]:
class Flatten(nn.Module):
    def __init__(self):
        super(Flatten, self).__init__()
        
    def forward(self, x):
        return x.view(BATCH_SIZE, -1)

In [15]:
class FConvMNIST(nn.Module):
    def __init__(self, seq, clf):
        super(FConvMNIST, self).__init__()
        self.seq = seq
        self.clf = clf
        
    def forward(self, x):
        return self.clf(self.seq(x)) 

In [16]:
#architecture of the network copied from the article
seq = nn.Sequential(
    nn.Conv2d(1, 256, kernel_size=(7, 7), stride=(1, 1)),
    nn.LeakyReLU(negative_slope=0.01),
    nn.MaxPool2d(kernel_size=2,stride=2, padding=0, dilation=1, ceil_mode=False),
    nn.Conv2d(256, 512, kernel_size=(5, 5), stride=(1, 1)),
    nn.LeakyReLU(negative_slope=0.01),
    nn.MaxPool2d(kernel_size=2,stride=2, padding=0, dilation=1, ceil_mode=False),
    Flatten()
)
clf = nn.Linear(in_features=4608, out_features=10, bias=True)

In [17]:
model = FConvMNIST(seq, clf)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-3)

In [18]:
loss_list = []
acc_list  = []
N_STEPS = len(train_loader)
print(N_STEPS)

#train network
for i in range(N_NETWORKS):
    for j in range(N_EPOCHS):
        for k, (images, labels) in enumerate(train_loader):
            if (images.shape[0] < BATCH_SIZE):
                break
            #forward
            pred = model(images)
            loss = criterion(pred, labels)
            loss_list.append(loss.item())
            
            #backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
            #statictics
            total = labels.size(0)
            _, predicted = torch.max(pred.data, 1)
            
            correct = (predicted == labels).sum().item()
            acc_list.append(correct / total)

            if k % 10 == 0:
                print('Network {}/{} Epoch {}/{}, Step {}/{}, Loss: {}, Accuracy: {}%'
                      .format(i, N_NETWORKS, j, N_EPOCHS, k, N_STEPS, loss.item(),
                              (sum(acc_list) / len(acc_list)) * 100))
                
    torch.save(model.state_dict(), model_paths[i])
    close(model_paths[i], "w")
    model = FConvMNIST(seq, clf)
    acc_list = []
    loss_list = []
    

16535
Network 0/100 Epoch 0/1, Step 0/16535, Loss: 2.3188929557800293, Accuracy: 12.5%
Network 0/100 Epoch 0/1, Step 10/16535, Loss: 1.8264448642730713, Accuracy: 27.27272727272727%
Network 0/100 Epoch 0/1, Step 20/16535, Loss: 1.245355248451233, Accuracy: 40.327380952380956%
Network 0/100 Epoch 0/1, Step 30/16535, Loss: 1.2772362232208252, Accuracy: 46.37096774193548%
Network 0/100 Epoch 0/1, Step 40/16535, Loss: 1.0589286088943481, Accuracy: 50.6859756097561%
Network 0/100 Epoch 0/1, Step 50/16535, Loss: 0.844024658203125, Accuracy: 54.595588235294116%
Network 0/100 Epoch 0/1, Step 60/16535, Loss: 0.6370245218276978, Accuracy: 57.63319672131148%
Network 0/100 Epoch 0/1, Step 70/16535, Loss: 0.632017970085144, Accuracy: 60.47535211267606%
Network 0/100 Epoch 0/1, Step 80/16535, Loss: 1.0207936763763428, Accuracy: 61.95987654320988%
Network 0/100 Epoch 0/1, Step 90/16535, Loss: 0.6667768955230713, Accuracy: 63.49587912087912%
Network 0/100 Epoch 0/1, Step 100/16535, Loss: 0.85929888486

Network 0/100 Epoch 0/1, Step 860/16535, Loss: 0.3883987069129944, Accuracy: 79.83449477351915%
Network 0/100 Epoch 0/1, Step 870/16535, Loss: 0.3639961779117584, Accuracy: 79.89021239954076%
Network 0/100 Epoch 0/1, Step 880/16535, Loss: 0.6087743043899536, Accuracy: 79.9233825198638%
Network 0/100 Epoch 0/1, Step 890/16535, Loss: 0.5921187400817871, Accuracy: 79.96632996632997%
Network 0/100 Epoch 0/1, Step 900/16535, Loss: 0.6756892800331116, Accuracy: 80.02566592674806%
Network 0/100 Epoch 0/1, Step 910/16535, Loss: 0.6165351271629333, Accuracy: 80.080268935236%
Network 0/100 Epoch 0/1, Step 920/16535, Loss: 0.24364815652370453, Accuracy: 80.15743756786102%
Network 0/100 Epoch 0/1, Step 930/16535, Loss: 0.4909859299659729, Accuracy: 80.22287862513426%
Network 0/100 Epoch 0/1, Step 940/16535, Loss: 0.31784892082214355, Accuracy: 80.25704038257173%
Network 0/100 Epoch 0/1, Step 950/16535, Loss: 0.4777461886405945, Accuracy: 80.30362776025235%
Network 0/100 Epoch 0/1, Step 960/16535, 

OSError: cannot identify image file <_io.BufferedReader name='./NOTMNIST/notMNIST_large/A/SG90IE11c3RhcmQgQlROIFBvc3Rlci50dGY=.png'>

In [None]:
parameters = model.parameters()

#get 256 tensors with shape [1, 7, 7]
layer_weights = next(parameters)

#draw one weight
imgplot = plt.imshow(layer_weights[0].view(7, -1).data)