In [112]:
import torch
import numpy as np
import pandas as pd

from tqdm import tqdm 
from torchvision.datasets import MNIST
from torchvision.transforms import Compose, ToTensor, Normalize, Lambda
from torch.utils.data import DataLoader

In [113]:
def overlay_y_on_x(x, y):
    x_ = x.clone()
    x_[:, :10] *= 0.0
    x_[range(x.shape[0]), y] = x.max()
    return x_

class Net(torch.nn.Module):

    def __init__(self, dims):
        super().__init__()
        self.layers = []
        for d in range(len(dims) - 1):
            self.layers += [Layer(dims[d], dims[d + 1]).cuda()]

    def predict(self, x):
        goodness_per_label = []
        for label in range(10):
            h = overlay_y_on_x(x, label)
            goodness = []
            for layer in self.layers:
                h = layer(h)
                goodness += [h.pow(2).mean(1)]
            goodness_per_label += [sum(goodness).unsqueeze(1)]
        goodness_per_label = torch.cat(goodness_per_label, 1)
        return goodness_per_label.argmax(1)

    def train(self, x_pos, x_neg):
        h_pos, h_neg = x_pos, x_neg
        for i, layer in enumerate(self.layers):
            print('training layer', i, '...')
            h_pos, h_neg = layer.train(h_pos, h_neg)


class Layer(nn.Linear):
    def __init__(self, in_features, out_features,
                 bias=True, device=None, dtype=None):
        super().__init__(in_features, out_features, bias, device, dtype)
        self.relu = torch.nn.ReLU()
        self.opt = torch.optim.Adam(self.parameters(), lr=0.03)
        self.threshold = 2.0
        self.num_epochs = 1000

    def forward(self, x):
        x_direction = x / (x.norm(2, 1, keepdim=True) + 1e-4)
        return self.relu(
            torch.mm(x_direction, self.weight.T) +
            self.bias.unsqueeze(0))

    def train(self, x_pos, x_neg):
        for i in tqdm(range(self.num_epochs)):
            g_pos = self.forward(x_pos).pow(2).mean(1)
            g_neg = self.forward(x_neg).pow(2).mean(1)
            # The following loss pushes pos (neg) samples to
            # values larger (smaller) than the self.threshold.
            loss = torch.log(1 + torch.exp(torch.cat([
                -g_pos + self.threshold,
                g_neg - self.threshold]))).mean()
            self.opt.zero_grad()
            # this backward just compute the derivative and hence
            # is not considered backpropagation.
            loss.backward()
            self.opt.step()
        return self.forward(x_pos).detach(), self.forward(x_neg).detach()

In [114]:
class FBNet(torch.nn.Module):
    def __init__(self, n_layers=2, n_input=10, n_output=4, lr=0.001):
        super().__init__()
        self.n_layers = n_layers
        self.layers = torch.nn.ModuleList([torch.nn.Linear(n_input, n_input).to("cuda") for i in range(n_layers)])
        self.out = torch.nn.Linear(n_input, n_output)

        self.criterion = torch.nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.parameters(), lr=lr)

    def forward(self, x):
        x = x.to("cuda")
        for layer in self.layers:
            x = layer(x)
            x = torch.nn.functional.relu(x)
        x = self.out(x)
        return x
    
    def train(self, x, y, epochs):
        for _ in range(epochs):
            self.optimizer.zero_grad()
            output = self.forward(x)
            y = y.to("cuda")
            loss = self.criterion(output, y)
            loss.backward()
            self.optimizer.step()
        # calculate accuracy
        _, predictions = torch.max(output, 1)
        accuracy = (predictions == y).float().mean()

        return accuracy.item()
    
    def eval(self, x, y):
      x = x.to("cuda")
      y = y.to("cuda")
      with torch.no_grad():
          output = self.forward(x)
          predictions = torch.argmax(output, dim=1)
          accuracy = (predictions == y).float().mean()
      return accuracy.item()

In [115]:
def MNIST_loaders(train_batch_size=50000, test_batch_size=10000):
    transform = Compose([
        ToTensor(),
        Normalize((0.1307,), (0.3081,)),
        Lambda(lambda x: torch.flatten(x))])

    train_loader = DataLoader(
        MNIST('./data/', train=True,
              download=True,
              transform=transform),
        batch_size=train_batch_size, shuffle=True)

    test_loader = DataLoader(
        MNIST('./data/', train=False,
              download=True,
              transform=transform),
        batch_size=test_batch_size, shuffle=False)

    return train_loader, test_loader

def get_model(ff_net_bool, n_layers, n_input, n_output):
    model = FBNet(n_layers=n_layers, n_input=n_input, n_output=n_output)
    return model

def train_and_get_eval_fb(model, epochs=1):
    train_loader, test_loader = MNIST_loaders()
    model = model.to("cuda")
    
    with torch.autocast("cuda", dtype=torch.float32):
        for _ in range(epochs):
            for x, y in train_loader:
                x.to("cuda")
                y.to("cuda")
                acc_train = model.train(x, y, 80)
    
    # eval on test
    with torch.autocast("cuda", dtype=torch.float32):
        acc_test = []
        for x, y in test_loader:
            x.to("cuda")
            y.to("cuda")
            test_acc = model.eval(x, y)
            acc_test.append(test_acc)
    
    train_acc = torch.tensor(acc_train).mean().item()
    test_acc = torch.tensor(acc_test).mean().item()

    return train_acc, test_acc

def train_and_get_eval_ff(epochs=1):
    train_loader, test_loader = MNIST_loaders()

    net = Net([784, 500, 500])
    x, y = next(iter(train_loader))
    x, y = x.cuda(), y.cuda()
    
    x_pos = overlay_y_on_x(x, y)
    rnd = torch.randperm(x.size(0))
    x_neg = overlay_y_on_x(x, y[rnd])
    net.train(x_pos, x_neg)

    train_acc=net.predict(x).eq(y).float().mean().item()

    print('train acc:', train_acc)

    x_te, y_te = next(iter(test_loader))
    x_te, y_te = x_te.cuda(), y_te.cuda()

    test_acc=net.predict(x_te).eq(y_te).float().mean().item()

    print('test acc:', test_acc)

    return train_acc, test_acc


if __name__ == "__main__":
    ff_train_accs, ff_test_accs = [], []
    fb_train_accs, fb_test_accs = [], []

    for _ in tqdm(range(15)):
        n_layers = 2
        n_input = 784
        n_output = 10

        #print("FBNet")
        model = get_model(False, n_layers, n_input, n_output)
        fb_train_acc, fb_test_acc = train_and_get_eval_fb(model)
        fb_train_accs.append(fb_train_acc)
        fb_test_accs.append(fb_test_acc)

        #print("FFNet")
        ff_train_acc, ff_test_acc = train_and_get_eval_ff()
        ff_train_accs.append(ff_train_acc)
        ff_test_accs.append(ff_test_acc)
        
    # Print the final accuracy results
    print(" ")
    print("FFNet train accuracy: {:.4f} +- {:.4f}".format(np.mean(ff_train_accs), np.std(ff_train_accs)))
    print("FFNet test accuracy: {:.4f} +- {:.4f}".format(np.mean(ff_test_accs), np.std(ff_test_accs)))
    print("FBNet train accuracy: {:.4f} +- {:.4f}".format(np.mean(fb_train_accs), np.std(fb_train_accs)))
    print("FBNet test accuracy: {:.4f} +- {:.4f}".format(np.mean(fb_test_accs), np.std(fb_test_accs)))
    
    # to pandas csv
    df = pd.DataFrame({"ff_train_acc": ff_train_accs, "ff_test_acc": ff_test_accs, "fb_train_acc": fb_train_accs, "fb_test_acc": fb_test_accs})
    df.to_csv("accs.csv")




  0%|          | 0/15 [00:00<?, ?it/s]

training layer 0 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  2%|▏         | 16/1000 [00:00<00:08, 114.76it/s][A
  3%|▎         | 28/1000 [00:00<00:36, 26.73it/s] [A
  3%|▎         | 34/1000 [00:01<00:45, 21.34it/s][A
  4%|▍         | 38/1000 [00:01<00:48, 19.90it/s][A
  4%|▍         | 41/1000 [00:01<00:50, 18.83it/s][A
  4%|▍         | 44/1000 [00:02<00:53, 17.75it/s][A
  5%|▍         | 47/1000 [00:02<00:57, 16.63it/s][A
  5%|▍         | 49/1000 [00:02<00:58, 16.16it/s][A
  5%|▌         | 51/1000 [00:02<00:59, 15.99it/s][A
  5%|▌         | 53/1000 [00:02<01:00, 15.78it/s][A
  6%|▌         | 55/1000 [00:02<01:00, 15.71it/s][A
  6%|▌         | 57/1000 [00:02<01:01, 15.44it/s][A
  6%|▌         | 59/1000 [00:03<01:03, 14.92it/s][A
  6%|▌         | 61/1000 [00:03<01:04, 14.67it/s][A
  6%|▋         | 63/1000 [00:03<01:04, 14.54it/s][A
  6%|▋         | 65/1000 [00:03<01:04, 14.60it/s][A
  7%|▋         | 67/1000 [00:03<01:03, 14.72it/s][A
  7%|▋         | 69/1000 [00:03<01:02, 14.85it/s][A

training layer 1 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  0%|          | 2/1000 [00:00<00:57, 17.23it/s][A
  0%|          | 4/1000 [00:00<00:59, 16.85it/s][A
  1%|          | 6/1000 [00:00<00:59, 16.61it/s][A
  1%|          | 8/1000 [00:00<00:59, 16.57it/s][A
  1%|          | 10/1000 [00:00<00:59, 16.51it/s][A
  1%|          | 12/1000 [00:00<01:00, 16.45it/s][A
  1%|▏         | 14/1000 [00:00<00:59, 16.50it/s][A
  2%|▏         | 16/1000 [00:00<00:57, 17.26it/s][A
  2%|▏         | 19/1000 [00:01<00:49, 19.71it/s][A
  2%|▏         | 22/1000 [00:01<00:45, 21.27it/s][A
  2%|▎         | 25/1000 [00:01<00:43, 22.32it/s][A
  3%|▎         | 28/1000 [00:01<00:42, 23.02it/s][A
  3%|▎         | 31/1000 [00:01<00:41, 23.50it/s][A
  3%|▎         | 34/1000 [00:01<00:40, 23.81it/s][A
  4%|▎         | 37/1000 [00:01<00:40, 24.05it/s][A
  4%|▍         | 40/1000 [00:01<00:39, 24.15it/s][A
  4%|▍         | 43/1000 [00:02<00:39, 24.31it/s][A
  5%|▍         | 46/1000 [00:02<00:39, 24.42it/s][A
  5%|

train acc: 0.930679976940155


  7%|▋         | 1/15 [02:18<32:19, 138.56s/it]

test acc: 0.9305999875068665
training layer 0 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  2%|▏         | 16/1000 [00:00<00:08, 121.20it/s][A
  3%|▎         | 29/1000 [00:00<00:36, 26.35it/s] [A
  4%|▎         | 35/1000 [00:01<00:44, 21.63it/s][A
  4%|▍         | 39/1000 [00:01<00:47, 20.05it/s][A
  4%|▍         | 42/1000 [00:01<00:49, 19.24it/s][A
  4%|▍         | 45/1000 [00:02<00:53, 17.84it/s][A
  5%|▍         | 48/1000 [00:02<00:56, 16.94it/s][A
  5%|▌         | 50/1000 [00:02<00:57, 16.60it/s][A
  5%|▌         | 52/1000 [00:02<00:58, 16.29it/s][A
  5%|▌         | 54/1000 [00:02<00:59, 16.00it/s][A
  6%|▌         | 56/1000 [00:02<00:59, 15.90it/s][A
  6%|▌         | 58/1000 [00:02<01:00, 15.69it/s][A
  6%|▌         | 60/1000 [00:03<01:02, 14.95it/s][A
  6%|▌         | 62/1000 [00:03<01:03, 14.79it/s][A
  6%|▋         | 64/1000 [00:03<01:02, 15.00it/s][A
  7%|▋         | 66/1000 [00:03<01:02, 14.86it/s][A
  7%|▋         | 68/1000 [00:03<01:01, 15.05it/s][A
  7%|▋         | 70/1000 [00:03<01:01, 15.01it/s][A

training layer 1 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  0%|          | 2/1000 [00:00<00:54, 18.19it/s][A
  0%|          | 4/1000 [00:00<00:58, 17.06it/s][A
  1%|          | 6/1000 [00:00<00:59, 16.66it/s][A
  1%|          | 8/1000 [00:00<00:59, 16.64it/s][A
  1%|          | 10/1000 [00:00<00:59, 16.59it/s][A
  1%|          | 12/1000 [00:00<00:59, 16.56it/s][A
  1%|▏         | 14/1000 [00:00<00:59, 16.56it/s][A
  2%|▏         | 16/1000 [00:00<00:57, 17.21it/s][A
  2%|▏         | 19/1000 [00:01<00:49, 19.72it/s][A
  2%|▏         | 22/1000 [00:01<00:45, 21.32it/s][A
  2%|▎         | 25/1000 [00:01<00:43, 22.36it/s][A
  3%|▎         | 28/1000 [00:01<00:42, 23.07it/s][A
  3%|▎         | 31/1000 [00:01<00:41, 23.53it/s][A
  3%|▎         | 34/1000 [00:01<00:40, 23.87it/s][A
  4%|▎         | 37/1000 [00:01<00:39, 24.13it/s][A
  4%|▍         | 40/1000 [00:01<00:39, 24.20it/s][A
  4%|▍         | 43/1000 [00:02<00:39, 24.35it/s][A
  5%|▍         | 46/1000 [00:02<00:39, 24.44it/s][A
  5%|

train acc: 0.9301999807357788


 13%|█▎        | 2/15 [04:36<29:56, 138.22s/it]

test acc: 0.9286999702453613
training layer 0 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  2%|▏         | 16/1000 [00:00<00:08, 114.79it/s][A
  3%|▎         | 28/1000 [00:00<00:35, 27.61it/s] [A
  3%|▎         | 34/1000 [00:01<00:44, 21.86it/s][A
  4%|▍         | 38/1000 [00:01<00:47, 20.17it/s][A
  4%|▍         | 41/1000 [00:01<00:50, 19.09it/s][A
  4%|▍         | 44/1000 [00:01<00:52, 18.12it/s][A
  5%|▍         | 47/1000 [00:02<00:56, 16.80it/s][A
  5%|▍         | 49/1000 [00:02<00:58, 16.39it/s][A
  5%|▌         | 51/1000 [00:02<00:59, 16.03it/s][A
  5%|▌         | 53/1000 [00:02<00:59, 15.95it/s][A
  6%|▌         | 55/1000 [00:02<01:00, 15.73it/s][A
  6%|▌         | 57/1000 [00:02<01:00, 15.66it/s][A
  6%|▌         | 59/1000 [00:02<01:02, 15.17it/s][A
  6%|▌         | 61/1000 [00:03<01:03, 14.82it/s][A
  6%|▋         | 63/1000 [00:03<01:03, 14.74it/s][A
  6%|▋         | 65/1000 [00:03<01:03, 14.79it/s][A
  7%|▋         | 67/1000 [00:03<01:02, 14.86it/s][A
  7%|▋         | 69/1000 [00:03<01:02, 14.85it/s][A

training layer 1 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  0%|          | 2/1000 [00:00<00:56, 17.72it/s][A
  0%|          | 4/1000 [00:00<00:58, 16.98it/s][A
  1%|          | 6/1000 [00:00<00:59, 16.68it/s][A
  1%|          | 8/1000 [00:00<01:00, 16.35it/s][A
  1%|          | 10/1000 [00:00<01:01, 15.97it/s][A
  1%|          | 12/1000 [00:00<01:02, 15.79it/s][A
  1%|▏         | 14/1000 [00:00<01:01, 15.92it/s][A
  2%|▏         | 16/1000 [00:00<00:58, 16.73it/s][A
  2%|▏         | 19/1000 [00:01<00:51, 19.21it/s][A
  2%|▏         | 22/1000 [00:01<00:46, 20.88it/s][A
  2%|▎         | 25/1000 [00:01<00:44, 22.04it/s][A
  3%|▎         | 28/1000 [00:01<00:42, 22.82it/s][A
  3%|▎         | 31/1000 [00:01<00:41, 23.36it/s][A
  3%|▎         | 34/1000 [00:01<00:40, 23.72it/s][A
  4%|▎         | 37/1000 [00:01<00:40, 24.00it/s][A
  4%|▍         | 40/1000 [00:01<00:39, 24.13it/s][A
  4%|▍         | 43/1000 [00:02<00:39, 24.29it/s][A
  5%|▍         | 46/1000 [00:02<00:39, 24.40it/s][A
  5%|

train acc: 0.9290199875831604


 20%|██        | 3/15 [06:54<27:36, 138.00s/it]

test acc: 0.9273999929428101
training layer 0 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  2%|▏         | 16/1000 [00:00<00:08, 119.43it/s][A
  3%|▎         | 28/1000 [00:00<00:35, 27.02it/s] [A
  3%|▎         | 34/1000 [00:01<00:44, 21.65it/s][A
  4%|▍         | 38/1000 [00:01<00:48, 19.96it/s][A
  4%|▍         | 41/1000 [00:01<00:50, 19.07it/s][A
  4%|▍         | 44/1000 [00:01<00:53, 17.81it/s][A
  5%|▍         | 47/1000 [00:02<00:56, 16.76it/s][A
  5%|▍         | 49/1000 [00:02<00:58, 16.37it/s][A
  5%|▌         | 51/1000 [00:02<00:58, 16.22it/s][A
  5%|▌         | 53/1000 [00:02<00:59, 15.88it/s][A
  6%|▌         | 55/1000 [00:02<00:59, 15.79it/s][A
  6%|▌         | 57/1000 [00:02<01:01, 15.42it/s][A
  6%|▌         | 59/1000 [00:03<01:02, 14.99it/s][A
  6%|▌         | 61/1000 [00:03<01:03, 14.71it/s][A
  6%|▋         | 63/1000 [00:03<01:03, 14.64it/s][A
  6%|▋         | 65/1000 [00:03<01:03, 14.71it/s][A
  7%|▋         | 67/1000 [00:03<01:02, 14.82it/s][A
  7%|▋         | 69/1000 [00:03<01:02, 14.80it/s][A

training layer 1 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  0%|          | 2/1000 [00:00<00:53, 18.80it/s][A
  0%|          | 4/1000 [00:00<00:57, 17.36it/s][A
  1%|          | 6/1000 [00:00<00:59, 16.81it/s][A
  1%|          | 8/1000 [00:00<01:00, 16.45it/s][A
  1%|          | 10/1000 [00:00<01:01, 16.05it/s][A
  1%|          | 12/1000 [00:00<01:02, 15.90it/s][A
  1%|▏         | 14/1000 [00:00<01:01, 16.13it/s][A
  2%|▏         | 16/1000 [00:00<00:58, 16.94it/s][A
  2%|▏         | 19/1000 [00:01<00:50, 19.47it/s][A
  2%|▏         | 22/1000 [00:01<00:46, 21.17it/s][A
  2%|▎         | 25/1000 [00:01<00:43, 22.27it/s][A
  3%|▎         | 28/1000 [00:01<00:42, 23.00it/s][A
  3%|▎         | 31/1000 [00:01<00:41, 23.48it/s][A
  3%|▎         | 34/1000 [00:01<00:40, 23.83it/s][A
  4%|▎         | 37/1000 [00:01<00:40, 24.06it/s][A
  4%|▍         | 40/1000 [00:01<00:39, 24.15it/s][A
  4%|▍         | 43/1000 [00:02<00:39, 24.35it/s][A
  5%|▍         | 46/1000 [00:02<00:38, 24.46it/s][A
  5%|

train acc: 0.9310199618339539


 27%|██▋       | 4/15 [09:12<25:17, 137.92s/it]

test acc: 0.9300999641418457
training layer 0 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  2%|▏         | 16/1000 [00:00<00:08, 114.86it/s][A
  3%|▎         | 28/1000 [00:00<00:35, 27.24it/s] [A
  3%|▎         | 34/1000 [00:01<00:44, 21.77it/s][A
  4%|▍         | 38/1000 [00:01<00:47, 20.07it/s][A
  4%|▍         | 41/1000 [00:01<00:49, 19.18it/s][A
  4%|▍         | 44/1000 [00:01<00:52, 18.19it/s][A
  5%|▍         | 47/1000 [00:02<00:55, 17.10it/s][A
  5%|▍         | 49/1000 [00:02<00:57, 16.47it/s][A
  5%|▌         | 51/1000 [00:02<00:58, 16.15it/s][A
  5%|▌         | 53/1000 [00:02<00:59, 16.02it/s][A
  6%|▌         | 55/1000 [00:02<01:00, 15.73it/s][A
  6%|▌         | 57/1000 [00:02<00:59, 15.80it/s][A
  6%|▌         | 59/1000 [00:02<01:00, 15.59it/s][A
  6%|▌         | 61/1000 [00:03<01:02, 15.00it/s][A
  6%|▋         | 63/1000 [00:03<01:03, 14.74it/s][A
  6%|▋         | 65/1000 [00:03<01:03, 14.82it/s][A
  7%|▋         | 67/1000 [00:03<01:02, 14.82it/s][A
  7%|▋         | 69/1000 [00:03<01:02, 14.92it/s][A

training layer 1 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  0%|          | 2/1000 [00:00<00:55, 17.89it/s][A
  0%|          | 4/1000 [00:00<00:58, 17.05it/s][A
  1%|          | 6/1000 [00:00<00:59, 16.78it/s][A
  1%|          | 8/1000 [00:00<00:59, 16.63it/s][A
  1%|          | 10/1000 [00:00<00:59, 16.55it/s][A
  1%|          | 12/1000 [00:00<00:59, 16.53it/s][A
  1%|▏         | 14/1000 [00:00<00:59, 16.46it/s][A
  2%|▏         | 16/1000 [00:00<00:57, 17.25it/s][A
  2%|▏         | 19/1000 [00:01<00:49, 19.75it/s][A
  2%|▏         | 22/1000 [00:01<00:45, 21.33it/s][A
  2%|▎         | 25/1000 [00:01<00:43, 22.38it/s][A
  3%|▎         | 28/1000 [00:01<00:42, 23.08it/s][A
  3%|▎         | 31/1000 [00:01<00:41, 23.55it/s][A
  3%|▎         | 34/1000 [00:01<00:40, 23.88it/s][A
  4%|▎         | 37/1000 [00:01<00:39, 24.10it/s][A
  4%|▍         | 40/1000 [00:01<00:39, 24.19it/s][A
  4%|▍         | 43/1000 [00:02<00:39, 24.34it/s][A
  5%|▍         | 46/1000 [00:02<00:39, 24.45it/s][A
  5%|

train acc: 0.9288399815559387


 33%|███▎      | 5/15 [11:29<22:56, 137.65s/it]

test acc: 0.9293999671936035
training layer 0 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  2%|▏         | 16/1000 [00:00<00:08, 114.81it/s][A
  3%|▎         | 28/1000 [00:00<00:36, 26.59it/s] [A
  3%|▎         | 34/1000 [00:01<00:45, 21.46it/s][A
  4%|▍         | 38/1000 [00:01<00:48, 19.98it/s][A
  4%|▍         | 41/1000 [00:01<00:50, 19.00it/s][A
  4%|▍         | 44/1000 [00:01<00:53, 18.02it/s][A
  5%|▍         | 47/1000 [00:02<00:56, 16.80it/s][A
  5%|▍         | 49/1000 [00:02<00:58, 16.33it/s][A
  5%|▌         | 51/1000 [00:02<00:59, 16.07it/s][A
  5%|▌         | 53/1000 [00:02<00:59, 15.83it/s][A
  6%|▌         | 55/1000 [00:02<01:00, 15.73it/s][A
  6%|▌         | 57/1000 [00:02<01:00, 15.54it/s][A
  6%|▌         | 59/1000 [00:03<01:01, 15.34it/s][A
  6%|▌         | 61/1000 [00:03<01:03, 14.84it/s][A
  6%|▋         | 63/1000 [00:03<01:04, 14.60it/s][A
  6%|▋         | 65/1000 [00:03<01:04, 14.60it/s][A
  7%|▋         | 67/1000 [00:03<01:03, 14.67it/s][A
  7%|▋         | 69/1000 [00:03<01:02, 14.85it/s][A

training layer 1 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  0%|          | 2/1000 [00:00<00:57, 17.27it/s][A
  0%|          | 4/1000 [00:00<01:02, 16.06it/s][A
  1%|          | 6/1000 [00:00<01:02, 15.95it/s][A
  1%|          | 8/1000 [00:00<01:01, 16.19it/s][A
  1%|          | 10/1000 [00:00<01:00, 16.25it/s][A
  1%|          | 12/1000 [00:00<01:00, 16.38it/s][A
  1%|▏         | 14/1000 [00:00<01:00, 16.42it/s][A
  2%|▏         | 16/1000 [00:00<00:57, 17.10it/s][A
  2%|▏         | 19/1000 [00:01<00:50, 19.61it/s][A
  2%|▏         | 22/1000 [00:01<00:45, 21.30it/s][A
  2%|▎         | 25/1000 [00:01<00:43, 22.36it/s][A
  3%|▎         | 28/1000 [00:01<00:42, 23.09it/s][A
  3%|▎         | 31/1000 [00:01<00:41, 23.57it/s][A
  3%|▎         | 34/1000 [00:01<00:40, 23.89it/s][A
  4%|▎         | 37/1000 [00:01<00:39, 24.11it/s][A
  4%|▍         | 40/1000 [00:01<00:39, 24.18it/s][A
  4%|▍         | 43/1000 [00:02<00:39, 24.38it/s][A
  5%|▍         | 46/1000 [00:02<00:38, 24.48it/s][A
  5%|

train acc: 0.9312199950218201


 40%|████      | 6/15 [13:46<20:38, 137.64s/it]

test acc: 0.9271999597549438
training layer 0 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  2%|▏         | 16/1000 [00:00<00:08, 114.79it/s][A
  3%|▎         | 28/1000 [00:00<00:34, 27.92it/s] [A
  3%|▎         | 34/1000 [00:01<00:43, 22.08it/s][A
  4%|▍         | 38/1000 [00:01<00:47, 20.38it/s][A
  4%|▍         | 41/1000 [00:01<00:49, 19.31it/s][A
  4%|▍         | 44/1000 [00:01<00:52, 18.30it/s][A
  5%|▍         | 47/1000 [00:02<00:55, 17.15it/s][A
  5%|▍         | 49/1000 [00:02<00:57, 16.60it/s][A
  5%|▌         | 51/1000 [00:02<00:58, 16.35it/s][A
  5%|▌         | 53/1000 [00:02<00:58, 16.10it/s][A
  6%|▌         | 55/1000 [00:02<00:58, 16.12it/s][A
  6%|▌         | 57/1000 [00:02<00:58, 16.04it/s][A
  6%|▌         | 59/1000 [00:02<01:00, 15.48it/s][A
  6%|▌         | 61/1000 [00:03<01:02, 15.07it/s][A
  6%|▋         | 63/1000 [00:03<01:03, 14.77it/s][A
  6%|▋         | 65/1000 [00:03<01:03, 14.76it/s][A
  7%|▋         | 67/1000 [00:03<01:02, 14.94it/s][A
  7%|▋         | 69/1000 [00:03<01:02, 14.95it/s][A

training layer 1 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  0%|          | 2/1000 [00:00<00:53, 18.60it/s][A
  0%|          | 4/1000 [00:00<00:58, 16.99it/s][A
  1%|          | 6/1000 [00:00<01:00, 16.42it/s][A
  1%|          | 8/1000 [00:00<01:01, 16.02it/s][A
  1%|          | 10/1000 [00:00<01:01, 16.15it/s][A
  1%|          | 12/1000 [00:00<01:01, 16.13it/s][A
  1%|▏         | 14/1000 [00:00<01:01, 16.05it/s][A
  2%|▏         | 16/1000 [00:00<00:58, 16.81it/s][A
  2%|▏         | 19/1000 [00:01<00:50, 19.33it/s][A
  2%|▏         | 22/1000 [00:01<00:46, 21.00it/s][A
  2%|▎         | 25/1000 [00:01<00:44, 22.14it/s][A
  3%|▎         | 28/1000 [00:01<00:42, 22.88it/s][A
  3%|▎         | 31/1000 [00:01<00:41, 23.40it/s][A
  3%|▎         | 34/1000 [00:01<00:40, 23.76it/s][A
  4%|▎         | 37/1000 [00:01<00:40, 24.00it/s][A
  4%|▍         | 40/1000 [00:01<00:39, 24.14it/s][A
  4%|▍         | 43/1000 [00:02<00:39, 24.29it/s][A
  5%|▍         | 46/1000 [00:02<00:39, 24.42it/s][A
  5%|

train acc: 0.9293799996376038


 47%|████▋     | 7/15 [16:04<18:21, 137.65s/it]

test acc: 0.9276999831199646
training layer 0 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  2%|▏         | 16/1000 [00:00<00:08, 114.84it/s][A
  3%|▎         | 28/1000 [00:00<00:36, 26.76it/s] [A
  3%|▎         | 34/1000 [00:01<00:45, 21.40it/s][A
  4%|▍         | 38/1000 [00:01<00:48, 19.99it/s][A
  4%|▍         | 41/1000 [00:01<00:50, 18.94it/s][A
  4%|▍         | 44/1000 [00:01<00:53, 17.92it/s][A
  5%|▍         | 47/1000 [00:02<00:56, 16.87it/s][A
  5%|▍         | 49/1000 [00:02<00:57, 16.44it/s][A
  5%|▌         | 51/1000 [00:02<00:58, 16.14it/s][A
  5%|▌         | 53/1000 [00:02<00:59, 15.94it/s][A
  6%|▌         | 55/1000 [00:02<01:00, 15.69it/s][A
  6%|▌         | 57/1000 [00:02<01:00, 15.55it/s][A
  6%|▌         | 59/1000 [00:03<01:02, 15.10it/s][A
  6%|▌         | 61/1000 [00:03<01:03, 14.81it/s][A
  6%|▋         | 63/1000 [00:03<01:03, 14.65it/s][A
  6%|▋         | 65/1000 [00:03<01:03, 14.66it/s][A
  7%|▋         | 67/1000 [00:03<01:02, 14.90it/s][A
  7%|▋         | 69/1000 [00:03<01:02, 14.85it/s][A

training layer 1 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  0%|          | 2/1000 [00:00<00:58, 16.98it/s][A
  0%|          | 4/1000 [00:00<00:59, 16.61it/s][A
  1%|          | 6/1000 [00:00<01:01, 16.15it/s][A
  1%|          | 8/1000 [00:00<01:03, 15.72it/s][A
  1%|          | 10/1000 [00:00<01:01, 15.99it/s][A
  1%|          | 12/1000 [00:00<01:01, 16.15it/s][A
  1%|▏         | 14/1000 [00:00<01:00, 16.24it/s][A
  2%|▏         | 16/1000 [00:00<00:57, 17.00it/s][A
  2%|▏         | 19/1000 [00:01<00:50, 19.49it/s][A
  2%|▏         | 22/1000 [00:01<00:46, 21.23it/s][A
  2%|▎         | 25/1000 [00:01<00:43, 22.34it/s][A
  3%|▎         | 28/1000 [00:01<00:42, 23.05it/s][A
  3%|▎         | 31/1000 [00:01<00:41, 23.59it/s][A
  3%|▎         | 34/1000 [00:01<00:40, 23.95it/s][A
  4%|▎         | 37/1000 [00:01<00:39, 24.19it/s][A
  4%|▍         | 40/1000 [00:01<00:39, 24.26it/s][A
  4%|▍         | 43/1000 [00:02<00:39, 24.41it/s][A
  5%|▍         | 46/1000 [00:02<00:38, 24.56it/s][A
  5%|

train acc: 0.9312199950218201


 53%|█████▎    | 8/15 [18:22<16:04, 137.83s/it]

test acc: 0.9318999648094177
training layer 0 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  2%|▏         | 16/1000 [00:00<00:08, 120.73it/s][A
  3%|▎         | 29/1000 [00:00<00:37, 25.98it/s] [A
  4%|▎         | 35/1000 [00:01<00:45, 21.37it/s][A
  4%|▍         | 39/1000 [00:01<00:48, 19.83it/s][A
  4%|▍         | 42/1000 [00:01<00:50, 18.86it/s][A
  4%|▍         | 45/1000 [00:02<00:53, 17.71it/s][A
  5%|▍         | 48/1000 [00:02<00:56, 16.77it/s][A
  5%|▌         | 50/1000 [00:02<00:57, 16.50it/s][A
  5%|▌         | 52/1000 [00:02<00:58, 16.12it/s][A
  5%|▌         | 54/1000 [00:02<00:59, 16.02it/s][A
  6%|▌         | 56/1000 [00:02<01:00, 15.68it/s][A
  6%|▌         | 58/1000 [00:02<01:00, 15.61it/s][A
  6%|▌         | 60/1000 [00:03<01:02, 15.10it/s][A
  6%|▌         | 62/1000 [00:03<01:03, 14.73it/s][A
  6%|▋         | 64/1000 [00:03<01:03, 14.71it/s][A
  7%|▋         | 66/1000 [00:03<01:03, 14.70it/s][A
  7%|▋         | 68/1000 [00:03<01:03, 14.75it/s][A
  7%|▋         | 70/1000 [00:03<01:02, 14.90it/s][A

training layer 1 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  0%|          | 2/1000 [00:00<00:58, 17.09it/s][A
  0%|          | 4/1000 [00:00<01:02, 15.97it/s][A
  1%|          | 6/1000 [00:00<01:02, 15.84it/s][A
  1%|          | 8/1000 [00:00<01:01, 16.14it/s][A
  1%|          | 10/1000 [00:00<01:01, 16.22it/s][A
  1%|          | 12/1000 [00:00<01:00, 16.22it/s][A
  1%|▏         | 14/1000 [00:00<01:00, 16.27it/s][A
  2%|▏         | 16/1000 [00:00<00:57, 17.01it/s][A
  2%|▏         | 19/1000 [00:01<00:50, 19.44it/s][A
  2%|▏         | 22/1000 [00:01<00:46, 21.04it/s][A
  2%|▎         | 25/1000 [00:01<00:44, 22.01it/s][A
  3%|▎         | 28/1000 [00:01<00:42, 22.83it/s][A
  3%|▎         | 31/1000 [00:01<00:41, 23.32it/s][A
  3%|▎         | 34/1000 [00:01<00:40, 23.77it/s][A
  4%|▎         | 37/1000 [00:01<00:40, 24.03it/s][A
  4%|▍         | 40/1000 [00:01<00:39, 24.15it/s][A
  4%|▍         | 43/1000 [00:02<00:39, 24.31it/s][A
  5%|▍         | 46/1000 [00:02<00:39, 24.42it/s][A
  5%|

train acc: 0.9301199913024902


 60%|██████    | 9/15 [20:40<13:46, 137.70s/it]

test acc: 0.9287999868392944
training layer 0 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  2%|▏         | 16/1000 [00:00<00:08, 114.81it/s][A
  3%|▎         | 28/1000 [00:00<00:35, 27.25it/s] [A
  3%|▎         | 34/1000 [00:01<00:44, 21.90it/s][A
  4%|▍         | 38/1000 [00:01<00:47, 20.15it/s][A
  4%|▍         | 41/1000 [00:01<00:50, 19.17it/s][A
  4%|▍         | 44/1000 [00:01<00:52, 18.22it/s][A
  5%|▍         | 47/1000 [00:02<00:55, 17.13it/s][A
  5%|▍         | 49/1000 [00:02<00:57, 16.52it/s][A
  5%|▌         | 51/1000 [00:02<00:58, 16.21it/s][A
  5%|▌         | 53/1000 [00:02<00:59, 16.03it/s][A
  6%|▌         | 55/1000 [00:02<00:59, 15.89it/s][A
  6%|▌         | 57/1000 [00:02<01:00, 15.69it/s][A
  6%|▌         | 59/1000 [00:02<01:00, 15.53it/s][A
  6%|▌         | 61/1000 [00:03<01:02, 14.96it/s][A
  6%|▋         | 63/1000 [00:03<01:03, 14.84it/s][A
  6%|▋         | 65/1000 [00:03<01:03, 14.84it/s][A
  7%|▋         | 67/1000 [00:03<01:03, 14.77it/s][A
  7%|▋         | 69/1000 [00:03<01:01, 15.04it/s][A

training layer 1 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  0%|          | 2/1000 [00:00<00:55, 18.14it/s][A
  0%|          | 4/1000 [00:00<00:58, 17.15it/s][A
  1%|          | 6/1000 [00:00<00:59, 16.83it/s][A
  1%|          | 8/1000 [00:00<00:59, 16.60it/s][A
  1%|          | 10/1000 [00:00<00:59, 16.59it/s][A
  1%|          | 12/1000 [00:00<00:59, 16.55it/s][A
  1%|▏         | 14/1000 [00:00<00:59, 16.52it/s][A
  2%|▏         | 16/1000 [00:00<00:57, 17.24it/s][A
  2%|▏         | 19/1000 [00:01<00:49, 19.73it/s][A
  2%|▏         | 22/1000 [00:01<00:45, 21.32it/s][A
  2%|▎         | 25/1000 [00:01<00:43, 22.37it/s][A
  3%|▎         | 28/1000 [00:01<00:42, 23.08it/s][A
  3%|▎         | 31/1000 [00:01<00:41, 23.56it/s][A
  3%|▎         | 34/1000 [00:01<00:40, 23.85it/s][A
  4%|▎         | 37/1000 [00:01<00:39, 24.09it/s][A
  4%|▍         | 40/1000 [00:01<00:39, 24.19it/s][A
  4%|▍         | 43/1000 [00:02<00:39, 24.35it/s][A
  5%|▍         | 46/1000 [00:02<00:39, 24.46it/s][A
  5%|

train acc: 0.9304199814796448


 67%|██████▋   | 10/15 [22:58<11:29, 137.92s/it]

test acc: 0.9320999979972839
training layer 0 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  2%|▏         | 16/1000 [00:00<00:08, 120.57it/s][A
  3%|▎         | 29/1000 [00:00<00:37, 26.11it/s] [A
  4%|▎         | 35/1000 [00:01<00:44, 21.50it/s][A
  4%|▍         | 39/1000 [00:01<00:48, 19.92it/s][A
  4%|▍         | 42/1000 [00:01<00:50, 18.90it/s][A
  4%|▍         | 45/1000 [00:02<00:54, 17.63it/s][A
  5%|▍         | 48/1000 [00:02<00:56, 16.73it/s][A
  5%|▌         | 50/1000 [00:02<00:57, 16.54it/s][A
  5%|▌         | 52/1000 [00:02<00:57, 16.37it/s][A
  5%|▌         | 54/1000 [00:02<00:58, 16.04it/s][A
  6%|▌         | 56/1000 [00:02<00:59, 15.94it/s][A
  6%|▌         | 58/1000 [00:02<01:00, 15.48it/s][A
  6%|▌         | 60/1000 [00:03<01:02, 14.98it/s][A
  6%|▌         | 62/1000 [00:03<01:03, 14.85it/s][A
  6%|▋         | 64/1000 [00:03<01:03, 14.75it/s][A
  7%|▋         | 66/1000 [00:03<01:02, 14.98it/s][A
  7%|▋         | 68/1000 [00:03<01:02, 14.90it/s][A
  7%|▋         | 70/1000 [00:03<01:01, 15.06it/s][A

training layer 1 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  0%|          | 2/1000 [00:00<00:57, 17.45it/s][A
  0%|          | 4/1000 [00:00<01:00, 16.48it/s][A
  1%|          | 6/1000 [00:00<01:02, 15.92it/s][A
  1%|          | 8/1000 [00:00<01:02, 15.97it/s][A
  1%|          | 10/1000 [00:00<01:01, 16.00it/s][A
  1%|          | 12/1000 [00:00<01:01, 16.08it/s][A
  1%|▏         | 14/1000 [00:00<01:01, 16.03it/s][A
  2%|▏         | 16/1000 [00:00<00:58, 16.76it/s][A
  2%|▏         | 19/1000 [00:01<00:50, 19.31it/s][A
  2%|▏         | 22/1000 [00:01<00:46, 21.02it/s][A
  2%|▎         | 25/1000 [00:01<00:44, 22.12it/s][A
  3%|▎         | 28/1000 [00:01<00:42, 22.91it/s][A
  3%|▎         | 31/1000 [00:01<00:41, 23.44it/s][A
  3%|▎         | 34/1000 [00:01<00:40, 23.82it/s][A
  4%|▎         | 37/1000 [00:01<00:39, 24.09it/s][A
  4%|▍         | 40/1000 [00:01<00:39, 24.17it/s][A
  4%|▍         | 43/1000 [00:02<00:39, 24.33it/s][A
  5%|▍         | 46/1000 [00:02<00:39, 24.45it/s][A
  5%|

train acc: 0.9295199513435364


 73%|███████▎  | 11/15 [25:16<09:11, 137.85s/it]

test acc: 0.9276999831199646
training layer 0 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  2%|▏         | 16/1000 [00:00<00:08, 114.84it/s][A
  3%|▎         | 28/1000 [00:00<00:36, 26.82it/s] [A
  3%|▎         | 34/1000 [00:01<00:44, 21.59it/s][A
  4%|▍         | 38/1000 [00:01<00:48, 19.94it/s][A
  4%|▍         | 41/1000 [00:01<00:50, 19.07it/s][A
  4%|▍         | 44/1000 [00:01<00:53, 18.02it/s][A
  5%|▍         | 47/1000 [00:02<00:56, 16.80it/s][A
  5%|▍         | 49/1000 [00:02<00:58, 16.39it/s][A
  5%|▌         | 51/1000 [00:02<00:58, 16.21it/s][A
  5%|▌         | 53/1000 [00:02<00:59, 15.87it/s][A
  6%|▌         | 55/1000 [00:02<00:59, 15.84it/s][A
  6%|▌         | 57/1000 [00:02<00:59, 15.75it/s][A
  6%|▌         | 59/1000 [00:02<01:01, 15.32it/s][A
  6%|▌         | 61/1000 [00:03<01:02, 14.95it/s][A
  6%|▋         | 63/1000 [00:03<01:03, 14.73it/s][A
  6%|▋         | 65/1000 [00:03<01:03, 14.75it/s][A
  7%|▋         | 67/1000 [00:03<01:02, 14.93it/s][A
  7%|▋         | 69/1000 [00:03<01:02, 14.90it/s][A

training layer 1 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  0%|          | 2/1000 [00:00<00:55, 18.02it/s][A
  0%|          | 4/1000 [00:00<00:58, 17.09it/s][A
  1%|          | 6/1000 [00:00<00:59, 16.81it/s][A
  1%|          | 8/1000 [00:00<00:59, 16.65it/s][A
  1%|          | 10/1000 [00:00<01:00, 16.40it/s][A
  1%|          | 12/1000 [00:00<01:01, 16.02it/s][A
  1%|▏         | 14/1000 [00:00<01:01, 15.91it/s][A
  2%|▏         | 16/1000 [00:00<00:58, 16.69it/s][A
  2%|▏         | 19/1000 [00:01<00:50, 19.29it/s][A
  2%|▏         | 22/1000 [00:01<00:46, 21.07it/s][A
  2%|▎         | 25/1000 [00:01<00:43, 22.22it/s][A
  3%|▎         | 28/1000 [00:01<00:42, 22.99it/s][A
  3%|▎         | 31/1000 [00:01<00:41, 23.47it/s][A
  3%|▎         | 34/1000 [00:01<00:40, 23.84it/s][A
  4%|▎         | 37/1000 [00:01<00:40, 24.06it/s][A
  4%|▍         | 40/1000 [00:01<00:39, 24.15it/s][A
  4%|▍         | 43/1000 [00:02<00:39, 24.36it/s][A
  5%|▍         | 46/1000 [00:02<00:38, 24.47it/s][A
  5%|

train acc: 0.9306399822235107


 80%|████████  | 12/15 [27:34<06:53, 137.88s/it]

test acc: 0.9294999837875366
training layer 0 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  2%|▏         | 16/1000 [00:00<00:08, 114.81it/s][A
  3%|▎         | 28/1000 [00:00<00:36, 26.97it/s] [A
  3%|▎         | 34/1000 [00:01<00:44, 21.60it/s][A
  4%|▍         | 38/1000 [00:01<00:48, 19.97it/s][A
  4%|▍         | 41/1000 [00:01<00:50, 19.12it/s][A
  4%|▍         | 44/1000 [00:01<00:53, 17.98it/s][A
  5%|▍         | 47/1000 [00:02<00:56, 16.74it/s][A
  5%|▍         | 49/1000 [00:02<00:58, 16.34it/s][A
  5%|▌         | 51/1000 [00:02<00:58, 16.26it/s][A
  5%|▌         | 53/1000 [00:02<00:59, 16.00it/s][A
  6%|▌         | 55/1000 [00:02<00:59, 15.82it/s][A
  6%|▌         | 57/1000 [00:02<00:59, 15.74it/s][A
  6%|▌         | 59/1000 [00:02<01:01, 15.25it/s][A
  6%|▌         | 61/1000 [00:03<01:03, 14.85it/s][A
  6%|▋         | 63/1000 [00:03<01:04, 14.59it/s][A
  6%|▋         | 65/1000 [00:03<01:03, 14.63it/s][A
  7%|▋         | 67/1000 [00:03<01:03, 14.66it/s][A
  7%|▋         | 69/1000 [00:03<01:02, 14.88it/s][A

training layer 1 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  0%|          | 2/1000 [00:00<00:59, 16.80it/s][A
  0%|          | 4/1000 [00:00<01:02, 15.97it/s][A
  1%|          | 6/1000 [00:00<01:02, 16.02it/s][A
  1%|          | 8/1000 [00:00<01:02, 15.93it/s][A
  1%|          | 10/1000 [00:00<01:03, 15.68it/s][A
  1%|          | 12/1000 [00:00<01:02, 15.74it/s][A
  1%|▏         | 14/1000 [00:00<01:02, 15.78it/s][A
  2%|▏         | 16/1000 [00:00<00:59, 16.53it/s][A
  2%|▏         | 19/1000 [00:01<00:51, 19.12it/s][A
  2%|▏         | 22/1000 [00:01<00:46, 20.85it/s][A
  2%|▎         | 25/1000 [00:01<00:44, 22.03it/s][A
  3%|▎         | 28/1000 [00:01<00:42, 22.85it/s][A
  3%|▎         | 31/1000 [00:01<00:41, 23.35it/s][A
  3%|▎         | 34/1000 [00:01<00:40, 23.71it/s][A
  4%|▎         | 37/1000 [00:01<00:40, 23.99it/s][A
  4%|▍         | 40/1000 [00:01<00:39, 24.08it/s][A
  4%|▍         | 43/1000 [00:02<00:39, 24.23it/s][A
  5%|▍         | 46/1000 [00:02<00:39, 24.36it/s][A
  5%|

train acc: 0.9275199770927429


 87%|████████▋ | 13/15 [29:51<04:35, 137.84s/it]

test acc: 0.9299999475479126
training layer 0 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  2%|▏         | 16/1000 [00:00<00:08, 114.81it/s][A
  3%|▎         | 28/1000 [00:00<00:34, 28.12it/s] [A
  3%|▎         | 34/1000 [00:01<00:41, 23.02it/s][A
  4%|▍         | 38/1000 [00:01<00:45, 21.31it/s][A
  4%|▍         | 41/1000 [00:01<00:47, 20.24it/s][A
  4%|▍         | 44/1000 [00:01<00:49, 19.33it/s][A
  5%|▍         | 47/1000 [00:02<00:51, 18.51it/s][A
  5%|▌         | 50/1000 [00:02<00:53, 17.72it/s][A
  5%|▌         | 52/1000 [00:02<00:55, 17.18it/s][A
  5%|▌         | 54/1000 [00:02<00:55, 17.04it/s][A
  6%|▌         | 56/1000 [00:02<00:55, 16.90it/s][A
  6%|▌         | 58/1000 [00:02<00:56, 16.77it/s][A
  6%|▌         | 60/1000 [00:02<00:56, 16.68it/s][A
  6%|▌         | 62/1000 [00:02<00:56, 16.60it/s][A
  6%|▋         | 64/1000 [00:03<00:56, 16.49it/s][A
  7%|▋         | 66/1000 [00:03<00:56, 16.42it/s][A
  7%|▋         | 68/1000 [00:03<00:57, 16.30it/s][A
  7%|▋         | 70/1000 [00:03<00:58, 16.00it/s][A

training layer 1 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  0%|          | 2/1000 [00:00<00:56, 17.58it/s][A
  0%|          | 4/1000 [00:00<00:58, 16.95it/s][A
  1%|          | 6/1000 [00:00<00:59, 16.72it/s][A
  1%|          | 8/1000 [00:00<01:00, 16.51it/s][A
  1%|          | 10/1000 [00:00<00:59, 16.57it/s][A
  1%|          | 12/1000 [00:00<00:59, 16.49it/s][A
  1%|▏         | 14/1000 [00:00<01:00, 16.40it/s][A
  2%|▏         | 16/1000 [00:00<00:57, 17.16it/s][A
  2%|▏         | 19/1000 [00:01<00:49, 19.67it/s][A
  2%|▏         | 22/1000 [00:01<00:45, 21.35it/s][A
  2%|▎         | 25/1000 [00:01<00:43, 22.38it/s][A
  3%|▎         | 28/1000 [00:01<00:42, 23.07it/s][A
  3%|▎         | 31/1000 [00:01<00:41, 23.57it/s][A
  3%|▎         | 34/1000 [00:01<00:40, 23.87it/s][A
  4%|▎         | 37/1000 [00:01<00:39, 24.12it/s][A
  4%|▍         | 40/1000 [00:01<00:39, 24.24it/s][A
  4%|▍         | 43/1000 [00:02<00:39, 24.36it/s][A
  5%|▍         | 46/1000 [00:02<00:38, 24.46it/s][A
  5%|

train acc: 0.9293199777603149


 93%|█████████▎| 14/15 [32:05<02:16, 136.40s/it]

test acc: 0.9296999573707581
training layer 0 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  2%|▏         | 16/1000 [00:00<00:08, 114.82it/s][A
  3%|▎         | 28/1000 [00:00<00:35, 27.46it/s] [A
  3%|▎         | 34/1000 [00:01<00:42, 22.70it/s][A
  4%|▍         | 38/1000 [00:01<00:45, 21.09it/s][A
  4%|▍         | 41/1000 [00:01<00:47, 20.09it/s][A
  4%|▍         | 44/1000 [00:01<00:49, 19.22it/s][A
  5%|▍         | 47/1000 [00:02<00:51, 18.45it/s][A
  5%|▌         | 50/1000 [00:02<00:54, 17.44it/s][A
  5%|▌         | 52/1000 [00:02<00:54, 17.25it/s][A
  5%|▌         | 54/1000 [00:02<00:55, 17.07it/s][A
  6%|▌         | 56/1000 [00:02<00:55, 16.91it/s][A
  6%|▌         | 58/1000 [00:02<00:55, 16.84it/s][A
  6%|▌         | 60/1000 [00:02<00:56, 16.75it/s][A
  6%|▌         | 62/1000 [00:02<00:56, 16.59it/s][A
  6%|▋         | 64/1000 [00:03<00:56, 16.44it/s][A
  7%|▋         | 66/1000 [00:03<00:57, 16.11it/s][A
  7%|▋         | 68/1000 [00:03<00:58, 15.85it/s][A
  7%|▋         | 70/1000 [00:03<00:58, 16.02it/s][A

training layer 1 ...



  0%|          | 0/1000 [00:00<?, ?it/s][A
  0%|          | 2/1000 [00:00<00:54, 18.39it/s][A
  0%|          | 4/1000 [00:00<00:57, 17.23it/s][A
  1%|          | 6/1000 [00:00<00:59, 16.84it/s][A
  1%|          | 8/1000 [00:00<00:59, 16.75it/s][A
  1%|          | 10/1000 [00:00<00:59, 16.58it/s][A
  1%|          | 12/1000 [00:00<00:59, 16.58it/s][A
  1%|▏         | 14/1000 [00:00<00:59, 16.53it/s][A
  2%|▏         | 16/1000 [00:00<00:57, 17.25it/s][A
  2%|▏         | 19/1000 [00:01<00:49, 19.76it/s][A
  2%|▏         | 22/1000 [00:01<00:45, 21.35it/s][A
  2%|▎         | 25/1000 [00:01<00:43, 22.39it/s][A
  3%|▎         | 28/1000 [00:01<00:42, 23.09it/s][A
  3%|▎         | 31/1000 [00:01<00:41, 23.57it/s][A
  3%|▎         | 34/1000 [00:01<00:40, 23.90it/s][A
  4%|▎         | 37/1000 [00:01<00:39, 24.10it/s][A
  4%|▍         | 40/1000 [00:01<00:39, 24.17it/s][A
  4%|▍         | 43/1000 [00:02<00:39, 24.33it/s][A
  5%|▍         | 46/1000 [00:02<00:39, 24.45it/s][A
  5%|

train acc: 0.9285999536514282


100%|██████████| 15/15 [34:17<00:00, 137.17s/it]

test acc: 0.9283999800682068
 
FFNet train accuracy: 0.9298 +- 0.0010
FFNet test accuracy: 0.9293 +- 0.0015
FBNet train accuracy: 1.0000 +- 0.0000
FBNet test accuracy: 0.9733 +- 0.0010



