In [113]:
from __future__ import print_function 

%load_ext autoreload
%autoreload 2

from matplotlib import pyplot as plt
%matplotlib inline

import os, sys
import numpy as np
import time

import tensorflow as tf
from tensorflow.keras import backend as K

import pandas as pd
import pickle
import gc, re, copy
from sklearn.model_selection import train_test_split
from tensorflow.python.keras.layers import deserialize, serialize
from tensorflow.python.keras.saving import saving_utils

# Project imports 
from data import mnist_m as mnistm
from data import mnist
from data.label_shift import label_shift_linear, plot_labeldist, plot_splitbars
from data.tasks import load_task
from experiments.training import *
from experiments.SL_bound import *
from experiments.DA_bound import *
from util.kl import *
from util.misc import *
from results.plotting import *

# Hyper-parameters
seed = 69105
batch_size = 128
num_classes = 10
epochs = 10
make_plots = False
delta=0.05 ## what would this be?   
binary=True
epsilons=[0.01]
alphas=[0.1]#0,0.3]
sigmas=[[3,2],[3,3]]

TASK = 2

project_folder = "/cephyr/users/frejohk/Alvis/projects/"

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load and structure data

In [11]:
x_source, y_source, x_target, y_target = load_task(TASK)

mean, variance 0.36348352 70.18035
---------------Load MNIST----------------
Training set (60000, 32, 32, 3) (60000, 10)
Test set (10000, 32, 32, 3) (10000, 10)


mean, variance 1.1809415 74.36859
---------------Load MNIST-M----------------
Training set (60000, 32, 32, 3) (60000, 10)
Test set (10000, 32, 32, 3) (10000, 10)
[[10.986111, 5.0, 2.999428, 1.99958, 1.399789, 1.0, 0.71435696, 0.5, 0.33346358, 0.20003448], [0.09088036, 0.19987813, 0.33326975, 0.5, 0.7143216, 1.0, 1.4001397, 2.0, 3.0, 5.0025883]]


In [137]:
def get_job_args(task, bound='germain', alpha=0.1, sigma=[3,2], epsilon=[0.01], binary=False):
 
    if binary:
        with open('posteriors/'+"task"+str(task)+"/Binary/"+str(int(1000*epsilon))+"_"+str(int(100*alpha))+'/params.txt', 'rb+') as f:
            params=f.readlines()
        f.close()
        prior_path="priors/"+"task"+str(task)+"/Binary/"+str(int(100*alpha))+"/prior.ckpt"
        result_path="results/"+"task"+str(task)+"/Binary/"+str(int(1000*epsilon))+"_"+str(int(100*alpha))+"_"
    else:
        with open('posteriors/'+"task"+str(task)+"/"+str(int(1000*epsilon))+"_"+str(int(100*alpha))+'/params.txt', 'rb+') as f:
            params=f.readlines()
        f.close()
        prior_path="priors/"+"task"+str(task)+"/"+str(int(100*alpha))+"/prior.ckpt"
        result_path="results/"+"task"+str(task)+"/"+str(int(1000*epsilon))+"_"+str(int(100*alpha))+"_"

    epsilon=float(params[1]) # @TODO: Superfluous? Isn't this submitted?
    epochs_trained=int(params[2]) # @TODO: Unused as far as I can see
    print(epochs_trained)
     
    posterior_paths = posterior_checkpoints(task, epsilon, alpha, binary=binary)
    
    arg_list = []
    for post in posterior_paths: 
        args = {
            'task': task, 
            'prior_path': prior_path, 
            'posterior_path': post,
            'bound': bound, 
            'alpha': alpha,
            'sigma': sigma, 
            'epsilon': epsilon, 
            'binary': binary
        }
        arg_list.append(args)
    return arg_list

    
def compute_bound_parts(task, posterior_path, x_bound, y_bound, x_target, y_target, alpha=0.1, delta=0.05, 
                  prior_path=None, bound='germain', binary=False, n_classifiers=4, sigma=[3,3]):

    print('Clearing session...')
    K.clear_session()
    
    print('Initializing models...')
    if binary:
        M_prior = init_MNIST_model_binary()
        M_posterior = init_MNIST_model_binary()
    else:
        M_prior = init_MNIST_model()
        M_posterior = init_MNIST_model()                

    # @TODO: Are the parameters for optimizer etc necessary when just loading the model?
    M_prior.compile(loss=tf.keras.losses.categorical_crossentropy,
                   optimizer=tf.keras.optimizers.SGD(learning_rate=0.003, momentum=0.95),
                      metrics=['accuracy'],)
    
    M_posterior.compile(loss=tf.keras.losses.categorical_crossentropy,
                   optimizer=tf.keras.optimizers.SGD(learning_rate=0.003, momentum=0.95),
                      metrics=['accuracy'],)
    
    ### load the prior weights if there are any
    if(binary and alpha != 0):
        prior_path="priors/"+"task"+str(task)+"/Binary/"+str(int(100*alpha))+"/prior.ckpt"
    elif(alpha != 0):
        prior_path="priors/"+"task"+str(task)+"/"+str(int(100*alpha))+"/prior.ckpt"
        
    print('Loading weights...')
    if alpha==0:
        ### do nothing, just take the random initialisation
        w_a=M_prior.get_weights()
    else:
        M_prior.load_weights(prior_path)
        w_a=M_prior.get_weights()
        
    # Load posterior weights
    M_posterior.load_weights(posterior_path)
    w_s=M_posterior.get_weights()
    
    t = time.time()
    
    ## do X draws of the posterior, for two separate classifiers
    sigma_tmp=sigma
    sigma=sigma[0]*10**(-1*sigma[1])
    
    print('Drawing classifiers...')
    w_s_draws = draw_classifier(w_s, sigma, num_classifiers=n_classifiers)
    w_s_draws2 = draw_classifier(w_s, sigma, num_classifiers=n_classifiers)
    
    elapsed = time.time() - t
    print('Time spent drawing the classifiers: %.4fs' % elapsed)
    
    """
    Calculate train and target errors
    """
    
    errorsum=[]
    target_errorsum=[]

    y_bound = np.array(y_bound)
    y_target = np.array(y_target)

    ######## in here we should make the results save in a vector for each part to be able to calculate
    ######## the standard deviation and be able to get error bars on things.
    print('Calculating errors...')
    t = time.time()
    for h in w_s_draws:
        M_posterior.set_weights(h)
        errorsum.append((1-M_posterior.evaluate(x_bound,y_bound,verbose=0)[1]))
        target_errorsum.append((1-M_posterior.evaluate(x_target,y_target,verbose=0)[1]))

    for hprime in w_s_draws2:
        M_posterior.set_weights(hprime)
        errorsum.append((1-M_posterior.evaluate(x_bound,y_bound,verbose=0)[1]))
        target_errorsum.append((1-M_posterior.evaluate(x_target,y_target,verbose=0)[1]))

    train_germain = np.mean(errorsum) 
    target_germain = np.mean(target_errorsum)  
    error_std = np.std(errorsum)
    target_error_std = np.std(target_errorsum)
    elapsed = time.time() - t
    print('Time spent calculating errors: %.4fs' % elapsed)
    
    
    """
    Calculate joint errors
    @TODO: This part should be merged with the above. Errors can be readibly computed from the predictions
    """
    
    e_ssum=[]
    e_tsum=[]
    d_txsum=[]
    d_sxsum=[]
    d_tx_h=0
    d_sx_h=0
    d_tx_hprime=0
    d_sx_hprime=0

    t = time.time()

    #### Here we just do the four pairs so there is no cross-usage
    #### this can be not good for the independence of the values which makes the CI useless

    for i, h in enumerate(w_s_draws):
        M_posterior.set_weights(h)
        d_tx_h=M_posterior.predict(x_target,verbose=0)
        d_sx_h=M_posterior.predict(x_bound,verbose=0)
        d_sx_h=make_01(d_sx_h)
        d_tx_h=make_01(d_tx_h)

        hprime=w_s_draws2[i]
        M_posterior.set_weights(hprime)
        d_tx_hprime=M_posterior.predict(x_target,verbose=0)
        d_sx_hprime=M_posterior.predict(x_bound,verbose=0)
        d_sx_hprime=make_01(d_sx_hprime)
        d_tx_hprime=make_01(d_tx_hprime)

        e_ssum.append(joint_error(d_sx_h,d_sx_hprime,y_bound))
        d_sxsum=(classifier_disagreement(d_sx_h,d_sx_hprime))
        e_tsum=(joint_error(d_tx_h,d_tx_hprime,y_target))
        d_txsum=(classifier_disagreement(d_tx_h,d_tx_hprime))

    # Means
    e_s = np.mean(e_ssum)
    d_sx = np.mean(d_sxsum)
    e_t = np.mean(e_tsum)
    d_tx = np.mean(d_txsum)
    
    # Stds
    e_s_std = np.std(e_ssum)
    d_sx_std = np.std(d_sxsum)
    e_t_std = np.std(e_tsum)
    d_tx_std = np.std(d_txsum)
    
    elapsed = time.time() - t
    print("Time spent calculating joint errors and disagreements: "+str(elapsed)+"\n")    


    """
    Compute the KL divergence
    """
    t = time.time()
    KL = estimate_KL(w_a, w_s, sigma) ## compute the KL

    elapsed = time.time() - t
    print("Time spent calculating KL: "+str(elapsed)+"\n") 
    

    print("Finished calculation of bound parts")
    
    """
    Finish up and store results
    """
    
    # Checkpoint corresponds to either update or epoch depending on first part 1_ or 2_ """
    checkpoint = os.path.splitext(os.path.basename(posterior_path))[0]
    
    updates = []
    if checkpoint[0:2]=="1_":
            updates = int(checkpoint[2:])
    else: 
        updates = (int(checkpoint[2:])+1)*547 # @TODO: Constant hack
       
    if Binary:
        result_path="results/"+"task"+str(TASK)+"/Binary/"+str(int(1000*epsilon))+"_"+str(int(100*alpha))+"_"+str(sigma_tmp[0])+str(sigma_tmp[1])+'_'+checkpoint
    else:
        result_path="results/"+"task"+str(TASK)+"/"+str(int(1000*epsilon))+"_"+str(int(100*alpha))+"_"+str(sigma_tmp[0])+str(sigma_tmp[1])+'_'+checkpoint
        
    # Create dir
    os.makedirs(result_path, exist_ok=True)
        
    results=pd.DataFrame({
        'Weightupdates': [updates],
        'train_germain': [train_germain],
        'target_germain': [target_germain],
        'KL': [KL],
        'e_s': [e_s],
        'e_t': [e_t],
        'd_tx': [d_tx], 
        'd_sx': [d_sx],
        'error_std': [error_std],
        'target_error_std': [target_error_std],
        'e_s_std': [e_s_std],
        'e_t_std': [e_t_std],
        'd_tx_std': [d_tx_std],
        'd_sx_std': [d_sx_std]
    })
   
    print('Saving results...')
    results.to_pickle(result_path)
    print('Done.')
   
    """
    The reimaining part only makes sense in the context of a set of snapshots
    
    # Number of samples 
    m=len(y_bound)
    
     # calculate disrho bound
    [res,bestparam, boundparts]=grid_search(train_germain,e_s,e_t,d_tx,d_sx,KL,delta,m)
    
    # calculate beta bound
    [res2,bestparam2, boundparts2]=grid_search(train_germain,e_s,e_t,d_tx,d_sx,KL,delta,m,beta_bound=True)            
                
    results['germain_bound']=res
    print("Germain bound"+str(res))
    print("[a, omega]= "+str(bestparam))
    
    Best=np.zeros([len(res),1])
    Best[0]=bestparam[0]
    Best[1]=bestparam[1]
    Best[2]=CLASSIFIERS
    #print(Best)
    results['bestparam']=Best
    results['boundpart1_germain']=boundparts[0]
    results['boundpart2_germain']=boundparts[1]
    results['boundpart3_germain']=boundparts[2]
    results['boundpart4_germain']=boundparts[3]
    results['boundpart5_germain']=boundparts[4]
    ## beta bound
    results['beta_bound']=res2
    results['beta_boundpart1']=boundparts2[0]
    results['beta_boundpart2']=boundparts2[1]
    results['beta_boundpart3']=boundparts2[2]
    
    fpath = project_folder+'mnist_transfer/'+result_path+"_results.pkl"
    print('Saving into: %s' % fpath)
    
    with open(fpath,'wb') as f:
        pickle.dump(results,f)
    f.close()
    return results
    """
    
def posterior_checkpoints(task, epsilon, alpha, binary=False):
    ### Here we do something more intelligent to not have to hardcode the epoch amounts. 
    ### we parse the filenames and sort them in numerical order and then load the weights
    if binary:
        base_path="posteriors/"+"task"+str(task)+"/Binary/"+str(int(1000*epsilon))+"_"+str(int(100*alpha))
    else:
        base_path="posteriors/"+"task"+str(task)+"/"+str(int(1000*epsilon))+"_"+str(int(100*alpha))
        
    list1=[]
    list2=[]
    dirFiles = os.listdir(base_path) #list of directory files
    ## remove the ckpt.index and sort so that we get the epochs that are in the directory
    for files in dirFiles: #filter out all non jpgs
        if '.ckpt.index' in files:
            name = re.sub('\.ckpt.index$', '', files)
            ### if it has a one it goes in one list and if it starts with a two it goes in the other
            if (name[0]=="1"):
                list1.append(name)
            elif (name[0]=="2"):
                list2.append(name)
                
    list1.sort(key=lambda f: int(re.sub('\D', '', f)))
    num_batchweights=len(list1)
    list2.sort(key=lambda f: int(re.sub('\D', '', f)))
    list1.extend(list2)
    Ws=list1
        
    path="posteriors/"+"task"+str(TASK)+"/"+str(int(1000*epsilon))+"_"+str(int(100*alpha))+"/"#"{epoch:0d}.ckpt"
    if Binary:
        path="posteriors/"+"task"+str(TASK)+"/Binary/"+str(int(1000*epsilon))+"_"+str(int(100*alpha))+"/"#"{epoch:0d}.ckpt"
    
    posterior_paths = [os.path.join(path, str(checkpoint)+".ckpt") for checkpoint in Ws]
    
    return posterior_paths

In [138]:
y_target_bin = make_mnist_binary(y_target)
y_source_bin = make_mnist_binary(y_source)

print('Iterating over experiments...\n')

np.random.seed(seed)
for alpha in alphas:
    
    print("alpha:"+str(alpha))
    
    if alpha==0:
        x_bound=x_source
        y_bound=y_source_bin
    else:
        x_bound, x_prior, y_bound , y_prior = train_test_split(x_source, y_source_bin, test_size=alpha)
    for epsilon in epsilons:
        print("  epsilon:"+str(epsilon))
        for sigma in sigmas:    # @TODO: This loop can be merged into the compute_bound part since models don't depend on sigma
            print("    sigma:"+str(sigma))
            arg_list = get_job_args(TASK, bound='germain', alpha=alpha, sigma=sigma,
                                    epsilon=epsilon, binary=binary)
            # @TODO: Test
            a = arg_list[0]            
            compute_bound_parts(a['task'], a['posterior_path'], x_bound, y_bound, x_target, y_target_bin, 
                          prior_path=a['prior_path'], bound=a['bound'], binary=a['binary'], sigma=a['sigma'])
            
               

Iterating over experiments...

alpha:0.1
  epsilon:0.01
    sigma:[3, 2]
14
Clearing session...
Initializing models...
Loading weights...
Drawing classifiers...
Time spent drawing the classifiers: 0.0367s
Calculating errors...


KeyboardInterrupt: 