In [2]:
from __future__ import print_function 

%load_ext autoreload
%autoreload 2

from matplotlib import pyplot as plt
%matplotlib inline

import os, sys
import numpy as np
import time

import tensorflow as tf
from tensorflow.keras import backend as K

import pandas as pd
import pickle
import gc, re, copy
from sklearn.model_selection import train_test_split
from tensorflow.python.keras.layers import deserialize, serialize
from tensorflow.python.keras.saving import saving_utils

# Project imports 
from data import mnist_m as mnistm
from data import mnist
from data.label_shift import label_shift_linear, plot_labeldist, plot_splitbars
from data.tasks import load_task
from experiments.training import *
from experiments.SL_bound import *
from experiments.DA_bound import *
from bounds.bounds import *
from util.kl import *
from util.misc import *
from util.batch import *
from results.plotting import *

# Hyper-parameters
task = 6
seed = 69105
n_classifiers = 40
delta=0.05 ## what would this be?   
binary=True
bound='germain'
epsilons=[0.1]#,0.001]
alphas=[0.1]#,0.1]#,0.3,0.5,0.7,0.9]#,0.5,0.7,0.9]#, 0.1,0.3,0.5]#0,0.3]
sigmas=[[3,1]]#,[3,2],[3,3]]

project_folder = "/cephyr/users/adambre/Alvis/"

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Alvis params

In [3]:
project = 'SNIC2021-7-83'
username = 'adambre'
job = 'batch_bound_single.sbatch'

In [3]:
!squeue -u $username

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON) 
            131090     alvis     bash  adambre  R 1-03:58:05      1 alvis1-04 


In [4]:
!cat $job

#!/usr/bin/bash

#SBATCH -t 1-00:00:00
#SBATCH -N 1 --gpus-per-node=T4:1
#SBATCH -p alvis 

if [ -z "$task" ]
then
    task=2
fi
if [ -z "$seed" ]
then 
    seed=69105
fi
if [ -z "$alpha" ]
then 
    alpha=0.0
fi
if [ -z "$sigma" ]
then 
    sigma='3,3'
fi
if [ -z "$epsilon" ]
then 
    epsilon=0.01
fi
if [ -z "$delta" ]
then 
    delta=0.05
fi
if [ -z "$binary" ]
then 
    binary=0
fi
if [ -z "$nclassifiers" ]
then 
    nclassifiers=2
fi
if [ -z "$bound" ]
then 
    bound='germain'
fi
if [ -z "$prior" ]
then 
    prior=''
fi
if [ -z "$posterior" ]
then 
    posterior=''
fi

. load_modules.sh

python batch_bound_single.py -t $task -r $seed -a $alpha -s $sigma -e $epsilon -d $delta -b $binary -n $nclassifiers -B $bound -p $prior -P $posterior

In [4]:
print('Iterating over experiments...\n')

fids = []

os.makedirs('logs', exist_ok=True)


np.random.seed(seed)
# for s in range(10):
#     seed+=1
#     print(seed)
for alpha in alphas:

    print("alpha:"+str(alpha))

    for epsilon in epsilons:
        print("  epsilon:"+str(epsilon))
        for sigma in sigmas:    
            print("    sigma:"+str(sigma))
            arg_list = get_job_args(task, bound=bound, alpha=alpha, sigma=sigma,
                                    epsilon=epsilon, binary=binary, n_classifiers=n_classifiers)
            for a in arg_list[5:]:   

                ckpt = os.path.splitext(os.path.basename(a['posterior_path']))[0]
                fid = './logs/batch_t-%d_r-%d_a-%.4f_s-%d%d_e-%.4f_d-%.4f_b-%d_n-%d_B-%s_c-%s' % \
                    (task, seed, alpha, sigma[0], sigma[1], epsilon, delta, binary, n_classifiers, bound, ckpt)

                sigstr = '"%d.%d"' % (sigma[0], sigma[1])
                exp = 'task=%d,seed=%d,alpha=%.4f,sigma="%s",epsilon=%.4f,delta=%.4f,'% (task, seed, alpha, sigstr, epsilon, delta)\
                        +'binary=%d,nclassifiers=%d,bound=%s,prior=%s,posterior=%s' % (binary, n_classifiers, bound, a['prior_path'], a['posterior_path'])

                prior_path = a['prior_path']
                output = !sbatch -o "$fid"-%j.out -e "$fid"-%j.err -A $project --export="$exp" $job

                jobid = int(output[0].split(' ')[-1])
                fid = fid+'-%s' % jobid

                fids.append(fid)

Iterating over experiments...

alpha:0.1
  epsilon:0.1
    sigma:[3, 1]


In [6]:
print(fids)

['./logs/batch_t-2_r-69105_a-0.1000_s-31_e-0.0300_d-0.0500_b-1_n-40_B-germain_c-1_275-132260', './logs/batch_t-2_r-69105_a-0.1000_s-31_e-0.0300_d-0.0500_b-1_n-40_B-germain_c-1_330-132261', './logs/batch_t-2_r-69105_a-0.1000_s-31_e-0.0300_d-0.0500_b-1_n-40_B-germain_c-1_385-132262', './logs/batch_t-2_r-69105_a-0.1000_s-31_e-0.0300_d-0.0500_b-1_n-40_B-germain_c-1_440-132263', './logs/batch_t-2_r-69105_a-0.1000_s-31_e-0.0300_d-0.0500_b-1_n-40_B-germain_c-1_495-132264', './logs/batch_t-2_r-69105_a-0.1000_s-31_e-0.0300_d-0.0500_b-1_n-40_B-germain_c-2_1-132265', './logs/batch_t-2_r-69105_a-0.1000_s-31_e-0.0300_d-0.0500_b-1_n-40_B-germain_c-2_2-132266', './logs/batch_t-2_r-69105_a-0.1000_s-31_e-0.0300_d-0.0500_b-1_n-40_B-germain_c-2_3-132267']


In [10]:
!squeue -u $username

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON) 
            132871     alvis     bash  adambre  R    2:06:24      1 alvis1-01 


In [11]:
for fid in fids: 
    print('------')
    print(fid)
    !tail -n 5 "$fid".out
    !tail -n 5 "$fid".err
    print(' \n\n')

------
./logs/batch_t-6_r-69105_a-0.1000_s-31_e-0.1000_d-0.0500_b-1_n-40_B-germain_c-1_660-132895
This job can be monitored from: https://scruffy.c3se.chalmers.se/d/alvis-job/alvis-job?var-jobid=132895&from=1638457773000
2021-12-02 16:10:09.761339: I tensorflow/core/common_runtime/process_util.cc:147] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
2021-12-02 16:10:11.388641: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2021-12-02 16:10:11.601699: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
/tmp/slurmd/job132895/slurm_script: line 54: 45920 Killed                  python batch_bound_single.py -t $task -r $seed -a $alpha -s $sigma -e $epsilon -d $delta -b $binary -n $nclassifiers -B $bound -p $prior -P $posterior
slurmstepd: error: Detected 2 oom-kill event(s) in step 132895.b

 


------
./logs/batch_t-6_r-69105_a-0.1000_s-31_e-0.1000_d-0.0500_b-1_n-40_B-germain_c-2_4-132903
This job can be monitored from: https://scruffy.c3se.chalmers.se/d/alvis-job/alvis-job?var-jobid=132903&from=1638457773000
2021-12-02 16:10:10.959403: I tensorflow/core/common_runtime/process_util.cc:147] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
2021-12-02 16:10:12.557499: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2021-12-02 16:10:12.787147: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
/tmp/slurmd/job132903/slurm_script: line 54:  4153 Killed                  python batch_bound_single.py -t $task -r $seed -a $alpha -s $sigma -e $epsilon -d $delta -b $binary -n $nclassifiers -B $bound -p $prior -P $posterior
slurmstepd: error: Detected 2 oom-kill event(s) in step 132903

 


------
./logs/batch_t-6_r-69105_a-0.1000_s-31_e-0.1000_d-0.0500_b-1_n-40_B-germain_c-2_12-132911
This job can be monitored from: https://scruffy.c3se.chalmers.se/d/alvis-job/alvis-job?var-jobid=132911&from=1638457774000
2021-12-02 16:10:11.665111: I tensorflow/core/common_runtime/process_util.cc:147] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
2021-12-02 16:10:13.264101: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2021-12-02 16:10:13.491709: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
/tmp/slurmd/job132911/slurm_script: line 54: 111791 Killed                  python batch_bound_single.py -t $task -r $seed -a $alpha -s $sigma -e $epsilon -d $delta -b $binary -n $nclassifiers -B $bound -p $prior -P $posterior
slurmstepd: error: Detected 2 oom-kill event(s) in step 1329

 


------
./logs/batch_t-6_r-69105_a-0.1000_s-31_e-0.1000_d-0.0500_b-1_n-40_B-germain_c-2_20-132919
This job can be monitored from: https://scruffy.c3se.chalmers.se/d/alvis-job/alvis-job?var-jobid=132919&from=1638457774000
2021-12-02 16:10:11.257094: I tensorflow/core/common_runtime/process_util.cc:147] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
2021-12-02 16:10:12.844932: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2021-12-02 16:10:13.072100: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
/tmp/slurmd/job132919/slurm_script: line 54: 133595 Killed                  python batch_bound_single.py -t $task -r $seed -a $alpha -s $sigma -e $epsilon -d $delta -b $binary -n $nclassifiers -B $bound -p $prior -P $posterior
slurmstepd: error: Detected 38 oom-kill event(s) in step 132

 


------
./logs/batch_t-6_r-69105_a-0.1000_s-31_e-0.1000_d-0.0500_b-1_n-40_B-germain_c-2_28-132927
This job can be monitored from: https://scruffy.c3se.chalmers.se/d/alvis-job/alvis-job?var-jobid=132927&from=1638457781000
2021-12-02 16:10:11.480114: I tensorflow/core/common_runtime/process_util.cc:147] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
2021-12-02 16:10:13.045272: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2021-12-02 16:10:13.279397: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
/tmp/slurmd/job132927/slurm_script: line 54: 104341 Killed                  python batch_bound_single.py -t $task -r $seed -a $alpha -s $sigma -e $epsilon -d $delta -b $binary -n $nclassifiers -B $bound -p $prior -P $posterior
slurmstepd: error: Detected 909 oom-kill event(s) in step 13

 


------
./logs/batch_t-6_r-69105_a-0.1000_s-31_e-0.1000_d-0.0500_b-1_n-40_B-germain_c-2_36-132935
This job can be monitored from: https://scruffy.c3se.chalmers.se/d/alvis-job/alvis-job?var-jobid=132935&from=1638457777000
2021-12-02 16:10:11.881141: I tensorflow/core/common_runtime/process_util.cc:147] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
2021-12-02 16:10:13.549654: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2021-12-02 16:10:13.784512: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
/tmp/slurmd/job132935/slurm_script: line 54: 153352 Killed                  python batch_bound_single.py -t $task -r $seed -a $alpha -s $sigma -e $epsilon -d $delta -b $binary -n $nclassifiers -B $bound -p $prior -P $posterior
slurmstepd: error: Detected 2 oom-kill event(s) in step 1329

 


------
./logs/batch_t-6_r-69105_a-0.1000_s-31_e-0.1000_d-0.0500_b-1_n-40_B-germain_c-2_44-132943
This job can be monitored from: https://scruffy.c3se.chalmers.se/d/alvis-job/alvis-job?var-jobid=132943&from=1638457777000
2021-12-02 16:10:26.579088: I tensorflow/core/common_runtime/process_util.cc:147] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
2021-12-02 16:10:28.190929: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2021-12-02 16:10:28.423401: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
/tmp/slurmd/job132943/slurm_script: line 54: 149231 Killed                  python batch_bound_single.py -t $task -r $seed -a $alpha -s $sigma -e $epsilon -d $delta -b $binary -n $nclassifiers -B $bound -p $prior -P $posterior
slurmstepd: error: Detected 2 oom-kill event(s) in step 1329

 


------
./logs/batch_t-6_r-69105_a-0.1000_s-31_e-0.1000_d-0.0500_b-1_n-40_B-germain_c-2_52-132951
This job can be monitored from: https://scruffy.c3se.chalmers.se/d/alvis-job/alvis-job?var-jobid=132951&from=1638457777000
2021-12-02 16:10:11.596682: I tensorflow/core/common_runtime/process_util.cc:147] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
2021-12-02 16:10:13.099721: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2021-12-02 16:10:13.336106: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
/tmp/slurmd/job132951/slurm_script: line 54: 166110 Killed                  python batch_bound_single.py -t $task -r $seed -a $alpha -s $sigma -e $epsilon -d $delta -b $binary -n $nclassifiers -B $bound -p $prior -P $posterior
slurmstepd: error: Detected 2 oom-kill event(s) in step 1329

 


------
./logs/batch_t-6_r-69105_a-0.1000_s-31_e-0.1000_d-0.0500_b-1_n-40_B-germain_c-2_60-132959
This job can be monitored from: https://scruffy.c3se.chalmers.se/d/alvis-job/alvis-job?var-jobid=132959&from=1638457777000
2021-12-02 16:10:11.666391: I tensorflow/core/common_runtime/process_util.cc:147] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
2021-12-02 16:10:13.546982: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2021-12-02 16:10:13.778016: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
/tmp/slurmd/job132959/slurm_script: line 54:  9022 Killed                  python batch_bound_single.py -t $task -r $seed -a $alpha -s $sigma -e $epsilon -d $delta -b $binary -n $nclassifiers -B $bound -p $prior -P $posterior
slurmstepd: error: Detected 5 oom-kill event(s) in step 13295

 


------
./logs/batch_t-6_r-69105_a-0.1000_s-31_e-0.1000_d-0.0500_b-1_n-40_B-germain_c-2_68-132967
This job can be monitored from: https://scruffy.c3se.chalmers.se/d/alvis-job/alvis-job?var-jobid=132967&from=1638457777000
2021-12-02 16:10:26.375696: I tensorflow/core/common_runtime/process_util.cc:147] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
2021-12-02 16:10:27.857344: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2021-12-02 16:10:28.087070: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
/tmp/slurmd/job132967/slurm_script: line 54: 180533 Killed                  python batch_bound_single.py -t $task -r $seed -a $alpha -s $sigma -e $epsilon -d $delta -b $binary -n $nclassifiers -B $bound -p $prior -P $posterior
slurmstepd: error: Detected 2 oom-kill event(s) in step 1329