In [144]:
from __future__ import print_function 

%load_ext autoreload
%autoreload 2

from matplotlib import pyplot as plt
%matplotlib inline

import os, sys
import numpy as np
import time

import tensorflow as tf
from tensorflow.keras import backend as K

import pandas as pd
import pickle
import gc, re, copy
from sklearn.model_selection import train_test_split
from tensorflow.python.keras.layers import deserialize, serialize
from tensorflow.python.keras.saving import saving_utils

# Project imports 
from data import mnist_m as mnistm
from data import mnist
from data.label_shift import label_shift_linear, plot_labeldist, plot_splitbars
from data.tasks import load_task
from experiments.training import *
from experiments.SL_bound import *
from experiments.DA_bound import *
from bounds.bounds import *
from util.kl import *
from util.misc import *
from results.plotting import *

# Hyper-parameters
TASK = 2
seed = 69105
batch_size = 128
num_classes = 10
n_classifiers = 2
epochs = 10
make_plots = False
delta=0.05 ## what would this be?   
binary=True
epsilons=[0.01]
alphas=[0.1]#0,0.3]
sigmas=[[3,2],[3,3]]

project_folder = "/cephyr/users/frejohk/Alvis/projects/"

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load and structure data

In [11]:
x_source, y_source, x_target, y_target = load_task(TASK)

mean, variance 0.36348352 70.18035
---------------Load MNIST----------------
Training set (60000, 32, 32, 3) (60000, 10)
Test set (10000, 32, 32, 3) (10000, 10)


mean, variance 1.1809415 74.36859
---------------Load MNIST-M----------------
Training set (60000, 32, 32, 3) (60000, 10)
Test set (10000, 32, 32, 3) (10000, 10)
[[10.986111, 5.0, 2.999428, 1.99958, 1.399789, 1.0, 0.71435696, 0.5, 0.33346358, 0.20003448], [0.09088036, 0.19987813, 0.33326975, 0.5, 0.7143216, 1.0, 1.4001397, 2.0, 3.0, 5.0025883]]


In [153]:
def get_job_args(task, bound='germain', alpha=0.1, sigma=[3,2], epsilon=[0.01], binary=False, n_classifiers=4):
 
    if binary:
        with open('posteriors/'+"task"+str(task)+"/Binary/"+str(int(1000*epsilon))+"_"+str(int(100*alpha))+'/params.txt', 'rb+') as f:
            params=f.readlines()
        f.close()
        prior_path="priors/"+"task"+str(task)+"/Binary/"+str(int(100*alpha))+"/prior.ckpt"
        result_path="results/"+"task"+str(task)+"/Binary/"+str(int(1000*epsilon))+"_"+str(int(100*alpha))+"_"
    else:
        with open('posteriors/'+"task"+str(task)+"/"+str(int(1000*epsilon))+"_"+str(int(100*alpha))+'/params.txt', 'rb+') as f:
            params=f.readlines()
        f.close()
        prior_path="priors/"+"task"+str(task)+"/"+str(int(100*alpha))+"/prior.ckpt"
        result_path="results/"+"task"+str(task)+"/"+str(int(1000*epsilon))+"_"+str(int(100*alpha))+"_"

    #epsilon=float(params[1]) # @TODO: Superfluous? Isn't this submitted?
    #epochs_trained=int(params[2]) # @TODO: Unused as far as I can see
     
    posterior_paths = posterior_checkpoints(task, epsilon, alpha, binary=binary)
    
    arg_list = []
    for post in posterior_paths: 
        args = {
            'task': task, 
            'prior_path': prior_path, 
            'posterior_path': post,
            'bound': bound, 
            'alpha': alpha,
            'sigma': sigma, 
            'epsilon': epsilon, 
            'binary': binary,
            'n_classifiers': n_classifiers
        }
        arg_list.append(args)
        
    return arg_list
    
def posterior_checkpoints(task, epsilon, alpha, binary=False):
    ### Here we do something more intelligent to not have to hardcode the epoch amounts. 
    ### we parse the filenames and sort them in numerical order and then load the weights
    if binary:
        base_path="posteriors/"+"task"+str(task)+"/Binary/"+str(int(1000*epsilon))+"_"+str(int(100*alpha))
    else:
        base_path="posteriors/"+"task"+str(task)+"/"+str(int(1000*epsilon))+"_"+str(int(100*alpha))
        
    list1=[]
    list2=[]
    dirFiles = os.listdir(base_path) #list of directory files
    ## remove the ckpt.index and sort so that we get the epochs that are in the directory
    for files in dirFiles: #filter out all non jpgs
        if '.ckpt.index' in files:
            name = re.sub('\.ckpt.index$', '', files)
            ### if it has a one it goes in one list and if it starts with a two it goes in the other
            if (name[0]=="1"):
                list1.append(name)
            elif (name[0]=="2"):
                list2.append(name)
                
    list1.sort(key=lambda f: int(re.sub('\D', '', f)))
    num_batchweights=len(list1)
    list2.sort(key=lambda f: int(re.sub('\D', '', f)))
    list1.extend(list2)
    Ws=list1
        
    path="posteriors/"+"task"+str(TASK)+"/"+str(int(1000*epsilon))+"_"+str(int(100*alpha))+"/"#"{epoch:0d}.ckpt"
    if Binary:
        path="posteriors/"+"task"+str(TASK)+"/Binary/"+str(int(1000*epsilon))+"_"+str(int(100*alpha))+"/"#"{epoch:0d}.ckpt"
    
    posterior_paths = [os.path.join(path, str(checkpoint)+".ckpt") for checkpoint in Ws]
    
    return posterior_paths

In [None]:
y_target_bin = make_mnist_binary(y_target)
y_source_bin = make_mnist_binary(y_source)

print('Iterating over experiments...\n')

np.random.seed(seed)
for alpha in alphas:
    
    print("alpha:"+str(alpha))
    
    if alpha==0:
        x_bound=x_source
        y_bound=y_source_bin
    else:
        x_bound, x_prior, y_bound, y_prior = train_test_split(x_source, y_source_bin, test_size=alpha)
    for epsilon in epsilons:
        print("  epsilon:"+str(epsilon))
        for sigma in sigmas:    # @TODO: This loop can be merged into the compute_bound part since models don't depend on sigma
            print("    sigma:"+str(sigma))
            arg_list = get_job_args(TASK, bound='germain', alpha=alpha, sigma=sigma,
                                    epsilon=epsilon, binary=binary, n_classifiers=n_classifiers)
            # @TODO: Test
            a = arg_list[0]            
            compute_bound_parts(a['task'], a['posterior_path'], x_bound, y_bound, x_target, y_target_bin, 
                          prior_path=a['prior_path'], bound=a['bound'], binary=a['binary'], sigma=a['sigma'], 
                          epsilon=0.01, n_classifiers=a['n_classifiers'])
            
               

Iterating over experiments...

alpha:0.1
  epsilon:0.01
    sigma:[3, 2]

----------------------------------------
Computing bound components for
   Prior: priors/task2/Binary/10/prior.ckpt
   Posterior: posteriors/task2/Binary/10_10/1_0.ckpt
Clearing session...
Initializing models...
Loading weights...
Drawing classifiers...
Time spent drawing the classifiers: 0.0200s
Calculating errors...
Time spent calculating errors: 28.3190s
Computing joint errors and disagreements...
