In [1]:
import torch
import torch.nn as nn
import numpy as np
import random
import math
import time
from torch.utils.data import Dataset,DataLoader
import os 

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = "cpu"
print("Using " + str(device))

Using cuda


In [3]:
torch.cuda.is_available()

True

# Uploading  (1024, 512) dataset

In [4]:
# "1" - FROZEN BITS; "0" - INFORMATION BITS
def load_data():
    #with open("../input/fbmerge-small/fbmerge_small.txt") as f:
    with open("../input/fbmerge-large/fbmerge_large.txt") as f: 
        content = f.readlines()
    dim = len(content[0].split())
    print("Number of bits is : " + str(dim))
    n = len(content)
    print("Number of samples is : " + str(n))
    data = torch.zeros(n, dim)
    for i in range(n):
        vals = content[i].split()
        for j in range(dim):
            data[i,j] = float(vals[j])
            
    with open("../input/fermergelarge/fermerge_large.txt") as f:
    #with open("../input/fermergesmall/fermerge_small.txt") as f:
        content = f.readlines()
    assert(len(content) == n)
    target = torch.zeros(n)
    for i in range(n):
        target[i] = float(content[i])
    return data, target

data, target = load_data()

Number of bits is : 1024
Number of samples is : 15862


In [5]:
print("Best fer on dataset is {:E}".format(target.min()))
print("FERs:", target)

Best fer on dataset is 5.750000E-05
FERs: tensor([4.7300e-04, 6.7700e-04, 1.5100e-04,  ..., 4.6000e-04, 4.2100e-04,
        7.7400e-05])


In [6]:
def cut_train_val(data, target, prop = 0.8):#Separate in train/test
    n = data.shape[0]
    n_train = int(float(n) * 0.8)
    return data[:n_train], target[:n_train], data[n_train:], target[n_train:]

x_train, y_train, x_val, y_val = cut_train_val(data, target)

print("Using " + str(x_train.shape[0]) + " samples to train")
print("Using " + str(x_val.shape[0]) + " samples to validate")

Using 12689 samples to train
Using 3173 samples to validate


In [7]:
mean_x_train = x_train.reshape(-1).mean() # = 0.5
std_x_train = x_train.reshape(-1).std() # = 0.5
keep_indices = torch.where(x_train.std(dim = 0) > 0)[0].numpy() # std for every bit position. 
#Store the positions where it vary
default_vals = torch.mean(x_train, dim = 0, keepdim = True) # mean for every bit position

print("Only varying indices are " + str(keep_indices))
print("Number of varying indexes is" , len(keep_indices))
print("default_vals ", default_vals)

Only varying indices are [127 189 190 215 219 221 222 231 235 237 238 242 244 248 311 315 317 318
 335 343 346 348 357 358 361 362 364 369 370 372 376 398 403 405 406 409
 410 412 419 421 422 425 426 428 433 434 451 453 454 457 464 480 543 559
 565 566 569 570 572 589 590 595 597 598 601 602 604 611 613 614 617 618
 624 647 651 653 654 659 661 662 664 676 680 688 705 706 708 712 720 736
 775 779 780 785 786 788 792 801 802 804 808 816 833 834 836 840 848 864
 897 898 900 904]
Number of varying indexes is 112
default_vals  tensor([[1., 1., 1.,  ..., 0., 0., 0.]])


In [8]:
def standardize_input(data): # reduce diment. of the inp to # of var bits
    result = torch.zeros(data.shape[0], len(keep_indices)) # 77466 by 26
    for i in range(data.shape[0]):
        for j in range(len(keep_indices)):
            result[i, j] = (data[i,keep_indices[j]] - mean_x_train) / std_x_train
            #Equivalent to (data[sample, var_ind] - 0.5)/0.5
            # 0 -> -1 <- info
            # 1 -> 1 <- frozen
    return result

def unstandardize_input(data): # recover the dimentionality to the original N
    result = default_vals + torch.zeros(data.shape[0], x_train.shape[1])
    for i in range(data.shape[0]):
        for j in range(data.shape[1]):
            result[i, keep_indices[j]] = data[i, j] * std_x_train + mean_x_train
    return result

In [9]:
new_x_train, new_x_val = standardize_input(x_train), standardize_input(x_val)

In [10]:
new_x_train_unst, new_x_val_unst = unstandardize_input(new_x_train), unstandardize_input(new_x_val)

In [11]:
print("train and test data dimentions (original): ", x_train.shape, x_val.shape)
print("train and test data dimentions (used, stand,): ", new_x_train.shape, new_x_val.shape)
print("train and test data dimentions (used, unstand.): ", new_x_train_unst.shape, new_x_val_unst.shape)
# As it is seen, the trainin/val data has a redused number of parameters (256 -> 36), which makes it 
# easier for the network to be trained. Probably, it is the varying bits which contribute the most to
# the final performance of the code.


print(x_train[0]) # first sample (orig)
print(new_x_train[0]) # first sample (modified, stand.)
print(new_x_train_unst[0]) # first sample (modified, unstand.)

train and test data dimentions (original):  torch.Size([12689, 1024]) torch.Size([3173, 1024])
train and test data dimentions (used, stand,):  torch.Size([12689, 112]) torch.Size([3173, 112])
train and test data dimentions (used, unstand.):  torch.Size([12689, 1024]) torch.Size([3173, 1024])
tensor([1., 1., 1.,  ..., 0., 0., 0.])
tensor([-1.,  1.,  1.,  1.,  1., -1., -1.,  1., -1., -1., -1.,  1.,  1.,  1.,
         1., -1., -1., -1.,  1., -1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        -1., -1., -1.,  1.,  1.,  1.,  1.,  1., -1., -1.,  1., -1., -1., -1.,
        -1., -1., -1., -1., -1., -1., -1., -1.,  1.,  1.,  1., -1.,  1.,  1.,
         1.,  1.,  1.,  1.,  1.,  1.,  1., -1., -1., -1., -1., -1., -1., -1.,
        -1., -1.,  1.,  1.,  1., -1., -1., -1., -1., -1.,  1.,  1.,  1.,  1.,
         1.,  1.,  1.,  1., -1., -1., -1., -1.,  1.,  1.,  1.,  1.,  1.,  1.,
         1.,  1., -1., -1.,  1., -1., -1., -1., -1., -1., -1., -1., -1., -1.])
tensor([1., 1., 1.,  ..., 0., 0., 0.])


In [12]:
mean_log_y_train = torch.log(y_train).mean() # can be any value depending on the dataset
std_log_y_train = torch.log(y_train).std()
print("Mean FER:", mean_log_y_train)
print("Sigma FER:", std_log_y_train)

Mean FER: tensor(-8.0152)
Sigma FER: tensor(0.4400)


In [13]:
def standardize_output(data): # convert FER into normalized log scale
    return (torch.log(data) - mean_log_y_train) / std_log_y_train

def unstandardize_output(data):
    return torch.exp((data * std_log_y_train) + mean_log_y_train)

In [14]:
new_y_train, new_y_val = standardize_output(y_train), standardize_output(y_val)

In [15]:
new_y_train_unst, new_y_unst = unstandardize_output(new_y_train), unstandardize_output(new_y_val)

In [44]:
print("train FER values (original): ", y_train)
print("train FER values (used, stand,): ", new_y_train)
print("train FER values (used, unstand.): ", new_y_train_unst)




##########################################################
best_fer_dataset = torch.min(new_y_train) # IS IT GLOBAL
print("best FER logscale: " + str(best_fer_dataset))
##########################################################

train FER values (original):  tensor([0.0005, 0.0007, 0.0002,  ..., 0.0004, 0.0007, 0.0003])
train FER values (used, stand,):  tensor([ 0.8156,  1.6305, -1.7796,  ...,  0.5670,  1.7799,  0.0652])
train FER values (used, unstand.):  tensor([0.0005, 0.0007, 0.0002,  ..., 0.0004, 0.0007, 0.0003])
best FER logscale: tensor(-3.9740)


In [17]:
class SimpleDataset(Dataset):
    def __init__(self, all_data, all_targets):
        super(Dataset, self).__init__()
        self.data = all_data # features(bits)
        self.targets = all_targets # labels (FER)

    def __getitem__(self, index: int):
        img, target = self.data[index], self.targets[index]
        return img, target

    def __len__(self) -> int:
        return self.data.shape[0]
#batch_size = 256
train_loader = DataLoader(SimpleDataset(new_x_train, new_y_train), 
                          batch_size= 32, shuffle = True, num_workers = 4) # num_worker = 4 * num_GPU

test_loader = DataLoader(SimpleDataset(new_x_val, new_y_val), 
                         batch_size= 32, num_workers = 4) # num_worker = 4 * num_GPU

  cpuset_checked))


# Defining architectures

In [18]:
input_dim = new_x_train.shape[1]
print("Input dimention:", new_x_train.shape[1]) # and 61972 TRAINING SAMPLES
# inp has 36 neurons
class Architecture(nn.Module):
    def __init__(self, depth, hidden_layer_size, skip_gaps):
        super(Architecture, self).__init__()
        layers = []
        previous_size = input_dim
        
        for i in range(depth):
            layer = []
            layer.append(nn.Linear(previous_size, 1 if i == depth - 1 else hidden_layer_size))
            previous_size = 1 if i == depth - 1 else hidden_layer_size
            
            if i < depth - 1: 
             # final layer has one neuron, since the output has one value (FER)
                layer.append(nn.ReLU())
                
            layers.append(nn.Sequential(*layer))
            
        self.layers = nn.ModuleList(layers)
        self.skip_gaps = skip_gaps
        
        
    def forward(self, x):
        activations = [x]
        for i in range(len(self.layers)):
            activations.append(self.layers[i](activations[-1]))
            
            if i - 1 % self.skip_gaps == 0 and i > self.skip_gaps: # skip condition
                activations[-1] += activations[-1 - self.skip_gaps]
                
        return activations[-1].reshape(-1)
    
# https://discuss.pytorch.org/t/understand-nn-module/8416

Input dimention: 112


In [19]:
def train(model, epochs, mixup = False, verbose = True):
    criterion = torch.nn.MSELoss() # aka loss function
    optimizer = torch.optim.Adam(model.parameters())
    
    model.train()
    
    for epoch in range(epochs):
        total_loss = 0
        
        for (batch_idx, (data, target)) in enumerate(train_loader):
            
            data, target = data.to(device), target.to(device) # try to work with GPU
            
            # Zero your gradients for every batch!
            optimizer.zero_grad()
            
            if mixup:
                lam = random.random()
                index_mixed = torch.randperm(data.shape[0])
                data = lam * data + (1 - lam) * data[index_mixed]
            
            # Make predictions for this batch
            output = model(data)
            
            # Compute the loss and its gradients
            if mixup:
                loss = lam * criterion(output, target) + (1 - lam) * criterion(output, target[index_mixed])
            else:
                loss = criterion(output, target) # USE THE CLASSIC ONE -> mixup = False  
            
            loss.backward()
            
            # Adjust learning weights
            optimizer.step()
            
            # Gather data and report
            total_loss += loss.item()
            
        if verbose:
            #print(epoch + 1, epochs, total_loss / (batch_idx+1)) # loss per batch
            print('  epoch {} loss: {}'.format(epoch + 1, total_loss / (batch_idx+1)))
        print('---------------------------------')
    print("Model is trained")
# https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html

In [20]:
def test(model, loader = test_loader, y_val = y_val, verbose = True): # IOE - inflation of error
    
    model.eval()
    results = []
    
    criterion = torch.nn.MSELoss()
    total_loss = 0
    
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for (batch_idx, (data, target)) in enumerate(loader):
            #data = data.to(device)
            #target = data.to(device) ##
            
            data, target = data.to(device), target.to(device) # try to work with GPU
            
            output = model(data)
            
            total_loss += criterion(output, target)
            results.append(output)
            
    if verbose:
        print("Test loss: " + str(total_loss / (batch_idx + 1)))
        
    results = torch.cat(results, dim = 0).cpu() # make a column IT NEEDS TO BE ON CPU???
    
    ########### FORMULA (5) <- IOE ###########
    readable_outputs = unstandardize_output(results) # to linear
    
    mean_ratio = 0
    worst_ratio = 0
    
    for i in range(readable_outputs.shape[0]):
        ratio = max(readable_outputs[i] / y_val[i], y_val[i] / readable_outputs[i]) - 1 ####
        mean_ratio += ratio
        worst_ratio = max(worst_ratio, ratio)
        
    mean_ratio /= readable_outputs.shape[0]
    
    return mean_ratio, worst_ratio

In [21]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300

In [22]:
def confidence(scores): # 95% confidence interval boundaries for the population mean
    return np.std(scores) * 1.96 / math.sqrt(len(scores))  

In [23]:
def stats(depth, hidden_layer_size, skip_gaps, mixup = False, max_time = -1, runs = 1):
    
    start_time = time.time()
    
    mean_results = []
    worst_results = []
    
    run = 0
    
    while True:
        if int(time.time() - start_time) > max_time and max_time > 0:
            break # takes more time that we give (seconds)
            
        if run >= runs:
            break # #times we run the network to compute the statistics
        run += 1
        
        model = Architecture(depth, hidden_layer_size, skip_gaps).to(device) # calls net.forward(input)
        print("Run: ", run)
        
        train(model, epochs, mixup)
        mean, worst = test(model) # outputs mean and worst IOE
        
        mean_results.append(mean)
        worst_results.append(worst)
    
    print("DONE")
    return [np.mean(mean_results), np.mean(worst_results), confidence(mean_results), 
            confidence(worst_results), run] 
# 0 - mean IOA, 1 - worst IOA, 2 - boundaries mean, 3 - boundaries worst, 4 - #runs prformed

In [24]:
# what do you want to run
e = False #epochs
h = False #hidden sizes
d = False #depths
s = False #skip connections

# IOE vs #epochs

In [25]:
if e:
    depth = 6
    hidden_layer_size = 320
    skip_gaps = 2
    mixup = False
    max_time = -1 # we don't care how long will it take
    runs = 5

    results = [[] for _ in range(5)] # size is 6
    # each row - same output for various hyperparameters

    #var_epochs = [2, 6, 10, 20, 30, 40]

    var_epochs = [10, 30, 50, 100, 150]
    
    
    for var_epoch in var_epochs:
        epochs = var_epoch
        print('Cureent epochs threshold:', epochs)
    
        outs = stats(depth, hidden_layer_size, skip_gaps, max_time = max_time, runs = runs)
    
        for i in range(len(outs)):
            results[i].append(outs[i])    

In [26]:
if e:
    # 0 - mean IOA, 1 - worst IOA, 2 - boundaries mean, 3 - boundaries worst, 4 - #runs prformed
    plt.figure(figsize=(8, 6), dpi=80)

    plt.subplot(2, 1, 1) # row 1, col 2 index 1
    plt.plot(var_epochs, results[0]) # mean IOA
    plt.fill_between(var_epochs, np.array(results[0]) - np.array(results[2]), 
                     np.array(results[0]) + np.array(results[2]), color='b', alpha=.1)
    plt.xlabel("Number of epochs in training")
    plt.ylabel("Average IOE on validation dataset")



    plt.subplot(2, 1, 2)
    plt.plot(var_epochs, results[1]) # worst IOA
    plt.fill_between(var_epochs, np.array(results[1]) - np.array(results[3]), 
                 np.array(results[1]) + np.array(results[3]), color='r', alpha=.1)
    plt.xlabel("Number of epochs in training")
    plt.ylabel("Worst IOE on validation dataset")
   
    plt.show()
    plt.savefig("/kaggle/working/vs_epochs.png") 
    

    for i in range(len(results)):
        print(results[i])

# IOE vs size of the hidden layers

In [27]:
if h:
    depth = 6
    epochs = 100
    skip_gaps = 2
    mixup = False
    max_time = -1 # we don't care how long will it take
    runs = 5


    results = [[] for _ in range(5)]
    sizes = [5, 10, 20, 40, 80, 160, 320, 640]


    for size in sizes:
        print('Cureent hidden leyer size:', size)
    
        outs = stats(depth, size, skip_gaps, mixup, max_time = max_time, runs = runs)
    
        for i in range(len(outs)):
            results[i].append(outs[i])

In [28]:
if h:
    # 0 - mean IOA, 1 - worst IOA, 2 - boundaries mean, 3 - boundaries worst, 4 - #runs prformed
    plt.figure(figsize=(8, 6), dpi=80)

    plt.subplot(2, 1, 1) # row 1, col 2 index 1
    plt.plot(sizes, results[0]) # mean IOA
    plt.fill_between(sizes, np.array(results[0]) - np.array(results[2]), 
                     np.array(results[0]) + np.array(results[2]), color='b', alpha=.1)
    plt.xlabel("Number of neurons in hidden layers")
    plt.ylabel("Average IOE on validation dataset")



    plt.subplot(2, 1, 2)
    plt.plot(sizes, results[1]) # worst IOA
    plt.fill_between(sizes, np.array(results[1]) - np.array(results[3]), 
                     np.array(results[1]) + np.array(results[3]), color='r', alpha=.1)
    plt.xlabel("Number of neurons in hidden layers")
    plt.ylabel("Worst IOE on validation dataset")


    plt.show()
    plt.savefig("/kaggle/working/vs_hidden.png") 
    
    for i in range(len(results)):
        print(results[i])

# IOE vs depth

In [29]:
if d:
    hidden_layer_size = 320
    epochs = 100
    skip_gaps = 2
    mixup = False
    max_time = -1 # we don't care how long will it take
    runs = 5


    results = [[] for _ in range(5)]
    depths = [2, 3, 4, 5, 6, 7, 8, 9, 10]


    for var_depth in depths:
        print('Cureent depth:', var_depth)
    
        outs = stats(var_depth, hidden_layer_size, skip_gaps, mixup, max_time = max_time, runs = runs)
    
        for i in range(len(outs)):
            results[i].append(outs[i])

In [30]:
if d:
    # 0 - mean IOA, 1 - worst IOA, 2 - boundaries mean, 3 - boundaries worst, 4 - #runs prformed
    plt.figure(figsize=(8, 6), dpi=80)

    plt.subplot(2, 1, 1) # row 1, col 2 index 1
    plt.plot(depths, results[0]) # mean IOA
    plt.fill_between(depths, np.array(results[0]) - np.array(results[2]), 
                     np.array(results[0]) + np.array(results[2]), color='b', alpha=.1)
    plt.xlabel("Depth of the architecture")
    plt.ylabel("Average IOE on validation dataset")



    plt.subplot(2, 1, 2)
    plt.plot(depths, results[1]) # worst IOA
    plt.fill_between(depths, np.array(results[1]) - np.array(results[3]), 
                     np.array(results[1]) + np.array(results[3]), color='r', alpha=.1)
    plt.xlabel("Depth of the architecture")
    plt.ylabel("Worst IOE on validation dataset")


    plt.show()
    plt.savefig("/kaggle/working/vs_depth.png") 
    
    for i in range(len(results)):
        print(results[i])

# IOE vs skip graph length

In [31]:
if s:
    hidden_layer_size = 320
    epochs = 100
    depth = 6
    mixup = False
    max_time = -1 # we don't care how long will it take
    runs = 5


    results = [[] for _ in range(5)]
    gaps = [2, 3, 4, 5, 6]
  
    
    for gap in gaps:
        print('Cureent skip graph lenghth:', gap)
    
        outs = stats(depth, hidden_layer_size, gap, mixup, max_time = max_time, runs = runs)
    
        for i in range(len(outs)):
            results[i].append(outs[i])

In [32]:
if s:
    # 0 - mean IOA, 1 - worst IOA, 2 - boundaries mean, 3 - boundaries worst, 4 - #runs prformed
    plt.figure(figsize=(8, 6), dpi=80)

    plt.subplot(2, 1, 1) # row 1, col 2 index 1
    plt.plot(gaps, results[0]) # mean IOA
    plt.fill_between(gaps, np.array(results[0]) - np.array(results[2]), 
                     np.array(results[0]) + np.array(results[2]), color='b', alpha=.1)
    plt.xlabel("Skip connections length")
    plt.ylabel("Average IOE on validation dataset")



    plt.subplot(2, 1, 2)
    plt.plot(gaps, results[1]) # worst IOA
    plt.fill_between(gaps, np.array(results[1]) - np.array(results[3]), 
                     np.array(results[1]) + np.array(results[3]), color='r', alpha=.1)
    plt.xlabel("Skip connections length")
    plt.ylabel("Worst IOE on validation dataset")


    plt.show()
    plt.savefig("/kaggle/working/vs_skip.png") 
    
    
    for i in range(len(results)):
        print(results[i])

# Generating frozen bit using Projected Gradient Descend

In [33]:
dataset_name = "64BYTES"

def write_inputs(inputs, method):
    inputs = unstandardize_input(inputs) # return from 36 var. bits to 256 bits
    
    f = open("/kaggle/working/proposed_fb_" + method + "_" + dataset_name + ".txt", "a")
    
    for i in range(inputs.shape[0]):
        for j in range(inputs.shape[1]):
            f.write(str(int(inputs[i,j].item())) + " ")
        f.write("\n")
    
    f.close()

def write_outputs(outputs, method, do_write = True):
    
    outputs = unstandardize_output(outputs)# dB -> lin
    
    if do_write:
        f = open("/kaggle/working/proposed_fer_" + method + "_" + dataset_name + ".txt", "a")
        
        for i in range(outputs.shape[0]):
            f.write(str(outputs[i].item()) + "\n")
            
        f.close()
        
    return outputs

In [34]:
pgd = True
rndm = False
# ----------- USED IN PGD AND RAND---------------------
epochs = 100
hidden_layer_size = 649
depth = 3
skip_gaps = 3
mixup = False
# ----------------------------------------------------    
if pgd:
    model = Architecture(depth, hidden_layer_size, skip_gaps).to(device)
    train(model, epochs, mixup)
#print("model is trained")

  epoch 1 loss: 0.1505621719908354
---------------------------------
  epoch 2 loss: 0.04609107737539697
---------------------------------
  epoch 3 loss: 0.02396059999716297
---------------------------------
  epoch 4 loss: 0.015960319761787387
---------------------------------
  epoch 5 loss: 0.012554041932217345
---------------------------------
  epoch 6 loss: 0.010937817745445583
---------------------------------
  epoch 7 loss: 0.010038844766024903
---------------------------------
  epoch 8 loss: 0.009959181823465101
---------------------------------
  epoch 9 loss: 0.010415730641343131
---------------------------------
  epoch 10 loss: 0.008928804821979903
---------------------------------
  epoch 11 loss: 0.008582876039251347
---------------------------------
  epoch 12 loss: 0.008026486530111778
---------------------------------
  epoch 13 loss: 0.008001385821423655
---------------------------------
  epoch 14 loss: 0.007140297785762613
---------------------------------
  epo

In [35]:
#model.parameters()

In [46]:
def pgd_search(model, lr, iterations):

    global best_fer_dataset # IS IT THE ONE FROM BEFORE???
    print ("best_fer_dataset:", best_fer_dataset) #best FER logscale: tensor(-4.4394) <- originally
    
    model.requires_grad = False
    
    for p in model.parameters():
        p.requires_grad = False
    #FREZE THE MODEL + PARAMETERS. COMPUTE THE GRADIENT WRT. THE INPUT (VAR BITS)
    
    data = torch.ones(1, new_x_train.shape[1]).to(device)# new_x_train.shape[1] = 36
    
    
    for i in range(data.shape[0]):# shape[0] - rows; shape[1] - columns
        data[i, torch.randperm(data.shape[1])[:data.shape[1] // 2]] = -1 # <- floor division
        #print("data: ", data) # should be one line
    
    data_approx = data.clone().to(device)# data - quantized version; data_approx - real version.     
    #print("data_approx: ", data_approx)
    
    # apply SGD to optimize parameters (which are, for some reasin, twice the variable bits)
    data.requires_grad = True  # quantized
    data_approx.requires_grad = True # real
    
    #print("WHAT IS THIS ", [data, data_approx])
    #######################################STOPED#######################333
    optimizer = torch.optim.SGD([data, data_approx], lr = lr)
    
    for it in range(iterations):
        optimizer.zero_grad()
        
        with torch.no_grad():
            meds = torch.median(data_approx, dim = 1)[0]
            #print(torch.median(data_approx, dim = 1))
            #print("meds:", meds)
            
            for i in range(data.shape[0]): # 1 by 36
                data[i][torch.where(data_approx[i] <= meds[i])] = -1
                data[i][torch.where(data_approx[i] > meds[i])] = 1
                
            #print("data after meds:", data)
        # having created a potential candidate predict its performance
        output = model(data) # predicts FER
        #print("predicted FER: ", output)
        
        with torch.no_grad():
            obtained_best_fer = output.item()
        
            if obtained_best_fer < best_fer_dataset:                
                best_fer_dataset = obtained_best_fer
                write_inputs(data, "pgd")
                write_outputs(output, "pgd")
                print("best_fer_dataset: ",best_fer_dataset, '/', unstandardize_output(best_fer_dataset),' achieved at iteration ', it)
                # print("best_fer_dataset is {:E}".format())
                #print("best_fer_dataset: ",best_fer_dataset, ' achieved at iteration ', it) best_fer_dataset
        output.backward() # derivative of the output wrt parameters
        
        data_approx.grad = data.grad #apply grad only to the real valued version
        
        optimizer.step()# update values of parameters

In [47]:
print(best_fer_dataset) # init - tensor(-4.4394)
print(unstandardize_output(best_fer_dataset))

tensor(-3.9740)
tensor(5.7500e-05)


In [48]:
if pgd:
    pgd_trials = 15

#print('best_fer     iteration')
    for run in range(pgd_trials):    
        pgd_search(model, 0.1, 5000)

best_fer_dataset: tensor(-3.9740)
best_fer_dataset:  -3.999882459640503 / tensor(5.6850e-05)  achieved at iteration  135
best_fer_dataset:  -4.247788429260254 / tensor(5.0975e-05)  achieved at iteration  145
best_fer_dataset:  -4.43595027923584 / tensor(4.6925e-05)  achieved at iteration  149
best_fer_dataset:  -4.87758731842041 / tensor(3.8638e-05)  achieved at iteration  156
best_fer_dataset:  -4.907249450683594 / tensor(3.8137e-05)  achieved at iteration  181
best_fer_dataset:  -4.937533378601074 / tensor(3.7632e-05)  achieved at iteration  206
best_fer_dataset:  -5.196807861328125 / tensor(3.3575e-05)  achieved at iteration  222
best_fer_dataset:  -5.442737102508545 / tensor(3.0132e-05)  achieved at iteration  238
best_fer_dataset:  -5.7454376220703125 / tensor(2.6375e-05)  achieved at iteration  243
best_fer_dataset:  -5.965045928955078 / tensor(2.3946e-05)  achieved at iteration  245
best_fer_dataset:  -6.159749984741211 / tensor(2.1980e-05)  achieved at iteration  277
best_fer_d

In [49]:
if pgd:# lets check  what we have
    fl = True
    
    with open("/kaggle/working/proposed_fb_pgd_64BYTES.txt") as f:
        content = f.readlines()
    
    dim = len(content[0].split())
    print("Number of bits is : " + str(dim))
    n = len(content)
    print("Number of samples is : " + str(n))
    
    for i in range(len(content)):
        code = np.array([int(s) for s in content[i].split() if s.isdigit()])# ignore delimeters and save as an array
    
        frozen_pos = np.where(np.array(code) != 0)[0] 
        info_pos = np.where(np.array(code) == 0)[0]
        
        if len(frozen_pos) != len(info_pos):
            print("SMTH IS WRONG WITH THE BITS")
            fl = False
    
    if fl:
        print("# of '0' == # of '1', ", len(frozen_pos))
        
    with open("/kaggle/working/proposed_fer_pgd_64BYTES.txt") as f:
        content = f.readlines()
    assert(len(content) == n)
    print("# FERs is the same")
    
    target = torch.zeros(n)
    for i in range(n):
        target[i] = float(content[i])
        
    print("Best fer on PGD DATASET is {:E}".format(target.min()))

Number of bits is : 1024
Number of samples is : 29
SMTH IS WRONG WITH THE BITS
# FERs is the same
Best fer on PGD DATASET is 1.825984E-05


In [None]:
#standardize_output(torch.tensor(1.668375*10**-4))

In [43]:
# clean up
"""os.remove("/kaggle/working/proposed_fb_pgd_64BYTES.txt")
os.remove("/kaggle/working/proposed_fer_pgd_64BYTES.txt")"""

# Random Search

In [None]:
def random_search(num_models, max_time, depth, hidden_layer_size, skip_gaps, mixup = False):
    
    models = [Architecture(depth, hidden_layer_size, skip_gaps).to(device) for _ in range(num_models)]

    print("Training models...")
    for i in range(len(models)):
        train(models[i], epochs, mixup)
    print("Training finished.")
    
    best_fer = 1000
    start_time = time.time()
    total_tested = 0
    
    while True:
        inp = torch.ones(1000, new_x_train.shape[1]).to(device) # 1000 by 36
        
        for i in range(inp.shape[0]):# 1000
            inp[i, torch.randperm(inp.shape[1])[:inp.shape[1] // 2]] = -1
            
        with torch.no_grad():
            # avg the output of the models
            out = torch.cat([model(inp).reshape(-1, 1) for model in models], dim = 1).mean(dim = 0)  
            # save outputs as rows and take the average by columns
            
        index = torch.argmin(out)
        
        if out[index] < best_fer:
            best_fer = out[index]
            write_inputs(inp[index].reshape(1, -1), "random_search")
            output = write_outputs(out[index].reshape(1, -1), "random_search")
            print("new best fer is {:E}".format(output.item()))
            
        if time.time() - start_time > max_time:
            break
            
        total_tested += 1000
        
        if total_tested % 10000000 == 0:
            print("Total tested is: " + str(total_tested))

In [None]:
if rndm:
    random_search_max_time = 60*5 # 1 sec search
    if random_search_max_time > 0:
        random_search(1, random_search_max_time, depth, hidden_layer_size, skip_gaps, mixup)

In [None]:
if rndm:
    # lets check  what we have
    with open("/kaggle/working/proposed_fb_random_search_64BYTES.txt") as f:
        content = f.readlines()
    dim = len(content[0].split())
    print("Number of bits is : " + str(dim))
    n = len(content)
    print("Number of samples is : " + str(n))
    
    for i in range(len(content)):
        code = np.array([int(s) for s in content[i].split() if s.isdigit()])# ignore delimeters and save as an array
    
        frozen_pos = np.where(np.array(code) != 0)[0] 
        info_pos = np.where(np.array(code) == 0)[0]
        
        if len(frozen_pos) != len(info_pos):
            print("SMTH IS WRONG WITH THE BITS")
            fl = False
    
    if fl:
        print("# of '0' == # of '1', ", len(frozen_pos))
            
    with open("/kaggle/working/proposed_fer_random_search_64BYTES.txt") as f:
        content = f.readlines()
    assert(len(content) == n)
    print("# FERs is the same")
    
    target = torch.zeros(n)
    for i in range(n):
        target[i] = float(content[i])
        
    print("Best fer on RANDOM DATASET is {:E}".format(target.min()))

In [None]:
# clean up
"""os.remove("/kaggle/working/proposed_fb_random_search_small.txt")
os.remove("/kaggle/working/proposed_fer_random_search_small.txt")"""