In [1]:
!nvidia-smi

Sun Oct  3 07:16:59 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.118.02   Driver Version: 440.118.02   CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 108...  On   | 00000000:51:00.0 Off |                  N/A |
|  0%   33C    P2    57W / 250W |   9339MiB / 11178MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  GeForce GTX 108...  On   | 00000000:CB:00.0 Off |                  N/A |
|  0%   33C    P8     9W / 250W |   1238MiB / 11178MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   2  GeForce GTX 108...  On   | 00000000:D5:00.0 Off |                  N/A |
| 26%   

In [2]:
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
from torch.autograd import Variable
from functools import reduce
import sys
from model import *

In [3]:
device = torch.device('cuda:2')

In [4]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [5]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
Ytrain= np.array(trainset.targets)
Xtrain= np.array(trainset.data)

Files already downloaded and verified


In [6]:
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
Ytest= np.array(testset.targets)
Xtest= np.array(testset.data)

Files already downloaded and verified


In [7]:
Xtrain= Xtrain.transpose(0,3,1,2)
Xtest=  Xtest.transpose(0,3,1,2)

In [8]:
model = nn.Sequential( # Sequential,
    nn.Conv2d(3,16,(3, 3),(1, 1),(1, 1),1,1,bias=False),
    nn.BatchNorm2d(16),
    nn.ReLU(),
    nn.Sequential( *[block1() for _ in range(8)]),
    nn.Sequential(block4(), block2(), block2(), block2(), block2(), block2(), block2(), block2(),),
    nn.Sequential(block5(), block3(), block3(), block3(), block3(), block3(), block3(), block3(),),
    nn.AvgPool2d((8, 8),(1, 1)),
    Lambda(lambda x: x.view(x.size(0),-1)), # View,
    nn.Sequential(Lambda(lambda x: x.view(1,-1) if 1==len(x.size()) else x ),nn.Linear(64,10)), # Linear,
)

In [9]:
model = model.to(device)

In [10]:
NUM_CLASSES = 10
GAMMA_FIRST=0.5
checkEvery=10000
GAMMA_THRESH=-0.0001
lr= 0.001

maxIters= 10000 #maximum iterations before stopping train layer
transform= True # do CIFAR-style image transformations?
BARCH_SIZE= 100

In [11]:
# build blocks (specific to fb architecture)
allBlocks = {}
allBlocks[0] = nn.Sequential(model[0], model[1], model[2])
for i in range(8): allBlocks[1 + i] = model[3][i] 
for i in range(8): allBlocks[9 + i] = model[4][i] 
for i in range(8): allBlocks[17+ i] = model[5][i]
criterion = nn.CrossEntropyLoss()
nFilters = 15
rounds = 25

In [12]:
# helper for augmentation - necessary for cifar 
def transform(X):
    tmp = np.zeros((np.shape(X)[0],3,38,38))
    tmp[:, :, 2:34, 2:34] = X
    for i in range(np.shape(X)[0]):
        r1 = np.random.randint(4)
        r2 = np.random.randint(4)
        X[i] = tmp[i, :, r1 : r1 + 32, r2 : r2 + 32]
        if np.random.uniform() > .5:
            X[i] = X[i,:,:,::-1]
    return X

In [13]:
# helper for model evaluation
def getPerformance(net, X, Y, n):
    acc = 0.
    model.eval()
    Xoutput = np.zeros((X.shape[0], 10))
    for batch in range(int(X.shape[0] / BARCH_SIZE)):
        start = batch * BARCH_SIZE; stop = (batch + 1) * BARCH_SIZE - 1
        ints = np.linspace(start, stop, BARCH_SIZE).astype(int)
        data = Variable(torch.from_numpy(X[ints])).float().to(device)
        for i in range(n): data = allBlocks[i](data)
        output = net(data)
        acc += np.mean(torch.max(output,1)[1].cpu().data.numpy() == Y[ints])
        Xoutput[ints] = output.cpu().data.numpy()
    acc /= (X.shape[0] / BARCH_SIZE)
    model.train()
    return acc, Xoutput

In [14]:
def printer(print_arr):
    for v in print_arr: sys.stdout.write(str(v) + '\t')
    sys.stdout.write('\n')
    sys.stdout.flush()

In [15]:
nTrain = len(trainset)
a_previous = 0.0
a_current = -1.0
s = np.zeros((nTrain, NUM_CLASSES))
cost = np.zeros((nTrain, NUM_CLASSES))
Xoutput_previous = np.zeros((nTrain, NUM_CLASSES))
Ybatch = np.zeros((BARCH_SIZE))
YbatchTest = np.zeros((BARCH_SIZE))
gamma_previous = GAMMA_FIRST
totalIterations = 0; tries = 0 

In [16]:
def cal_cost(s):
    Z = 0 
    for i in range(nTrain):
        localSum = 0
        for l in range(NUM_CLASSES):
            if l != Ytrain[i]:
                cost[i][l] = np.exp(s[i][l] - s[i][int(Ytrain[i])])
                localSum += cost[i][l]
        cost[i][int(Ytrain[i])] = -1 * localSum
        Z += localSum
    
    return cost, Z

In [17]:
def build_tmp_model(n):
    bk = allBlocks[n]
    ci = nn.Sequential(model[6], model[7], model[8])
    
    if n < 17: 
        ci = nn.Sequential(allBlocks[17], ci)
    if n < 9:  
        ci = nn.Sequential(allBlocks[9], ci)
    
    modelTmp = nn.Sequential(bk, ci, nn.Softmax(dim=0))
    modelTmp = modelTmp.to(device)
    return modelTmp

In [None]:
for n in range(rounds):
    gamma = -1
    cost, Z = cal_cost(s)
    modelTmp = build_tmp_model(n)
    optimizer = torch.optim.Adam(modelTmp.parameters(), lr=lr)
    tries = 0
    XbatchTest = torch.zeros(BARCH_SIZE, nFilters, 32, 32)
    
    while (gamma < GAMMA_THRESH and ((checkEvery * tries) < maxIters)):
        accTrain = 0; 
        accTest = 0; 
        err = 0;
        for batch in range(1, checkEvery+1):
            optimizer.zero_grad()

            # get batch of training samples
            ints = np.random.randint(np.shape(Xtrain)[0] - 1, size=(BARCH_SIZE))
            Xbatch = Xtrain[ints]
            Ybatch = Variable(torch.from_numpy(Ytrain[ints])).long().to(device)

            # do transformations
            if transform: 
                Xbatch = transform(Xbatch)
            data = Variable(torch.from_numpy(Xbatch)).float().to(device)
            
            for i in range(n): 
                data = allBlocks[i](data)
            
            data = data.to(device)
            # get gradients
            output = modelTmp(data)
            loss = torch.exp(criterion(output, Ybatch))
            loss.backward()
            err += loss.item()

            # evaluate training accuracy
            output = modelTmp(data)
            accTrain += np.mean(torch.max(output,1)[1].cpu().data.numpy() == Ytrain[ints])
            
            # get test accuracy 
            model.eval()
            ints = np.random.randint(np.shape(Xtest)[0] - 1, size=(BARCH_SIZE))
            Xbatch = Xtest[ints]
            data = Variable(torch.from_numpy(Xbatch)).float().to(device)
            for i in range(n): 
                data = allBlocks[i](data)
            data = data.to(device)
            output = modelTmp(data)
            accTest += np.mean(torch.max(output,1)[1].cpu().data.numpy() == Ytest[ints])
            model.train()

            for p in modelTmp.parameters(): 
                p.grad.data.clamp_(-.1, .1)            
            optimizer.step()
        print(f"Round: {n}, Error: {err / checkEvery}, Train Acc: {accTrain / checkEvery}, Test Acc: {accTest / checkEvery}")
        
        # compute gamma
        accTrain, Xoutput = getPerformance(modelTmp, Xtrain, Ytrain, n)
        gamma_current = -1 * np.sum(Xoutput * cost) / Z
        gamma = (gamma_current ** 2 - gamma_previous ** 2)/(1 - gamma_previous ** 2) 
        if gamma > 0: 
            gamma = np.sqrt(gamma)
        else: 
            gamma = -1 * np.sqrt(-1 * gamma)
        a_current = 0.5 * np.log((1 + gamma_current) / (1 - gamma_current))
    
        tries += 1  

    s += Xoutput * a_current - Xoutput_previous * a_previous
    accTest, _ = getPerformance(modelTmp, Xtest, Ytest, n)    
    gamma_previous = gamma_current


Round: 0, Error: 9.3772533867836, Train Acc: 0.45119100000000434, Test Acc: 0.44656900000000316
Round: 1, Error: 9.254754838943482, Train Acc: 0.5597680000000104, Test Acc: 0.5629590000000104
Round: 2, Error: 9.222338208675385, Train Acc: 0.6055879999999983, Test Acc: 0.6094340000000021
Round: 3, Error: 9.209418673801423, Train Acc: 0.6297759999999953, Test Acc: 0.6313269999999952
Round: 4, Error: 9.201196561145782, Train Acc: 0.6473399999999899, Test Acc: 0.649373999999989
Round: 5, Error: 9.196208872509002, Train Acc: 0.6605139999999856, Test Acc: 0.6603159999999851
Round: 6, Error: 9.193766283416748, Train Acc: 0.6681629999999861, Test Acc: 0.6694709999999832
Round: 7, Error: 9.190884055137634, Train Acc: 0.6788479999999789, Test Acc: 0.679249999999982
Round: 8, Error: 9.188860508060456, Train Acc: 0.6867099999999821, Test Acc: 0.6855249999999828
Round: 9, Error: 9.185612966632844, Train Acc: 0.6953599999999831, Test Acc: 0.6914989999999801
Round: 10, Error: 9.185023915863036, Train