# Notebook 02: Pipeline for CATHODE results

This notebook goes through the pipeline for obtaining results using the idealized CATHODE (Classifying Anomalies THrough Outer Density Estimation) method.

In [1]:
import os
import argparse
import numpy as np

from run_ANODE_training import main as train_DE
from run_classifier_data_creation import main as create_data
from run_classifier_training import main as train_classifier
from run_ANODE_evaluation import main as eval_ANODE
from evaluation_utils import full_single_evaluation, classic_ANODE_eval, minimum_val_loss_model_evaluation

In [2]:
mode = 'CATHODE'
data_dir = '../separated_data'
save_dir = 'CATHODE_models'
# Shift on jet mass variables to be applied.
datashift = 0.
# Shift is not correlated to the actual mjj but randomized.
random_shift = False
# Whether to apply an (ANODE paper) fiducial cut on the data (and samples).
fiducial_cut = False
# Suppress the processing of the extra signal sample.
no_extra_signal = True
verbose = False

# ANODE model config file (.yml).
DE_config_file = '../DE_MAF_model.yml'
# 'Number of Density Estimation training epochs.'
DE_epochs = 100
# Batch size during density estimation training.
DE_batch_size = 256
# Skips the density estimation (loads existing files instead).
DF_skip = False
# Turns off the logit transform in the density estimator.
DE_no_logit = False

# File name for the density estimator.
DE_file_name = 'my_ANODE_model'

# Classifier model config file (.yml).
cf_config_file = '../classifier.yml'

# Number of classifier training epochs
cf_epochs = 100
# Number of samples to be generated. Currently the samples will be cut down to match data proportion.
cf_n_samples = 400000

# Sample the conditional from a KDE fit rather than a uniform distribution.
cf_realistic_conditional = True
# Bandwith of the KDE fit (used when realistic_conditional is selected)
cf_KDE_bandwidth = 0.01
# Add the full number of samples to the training set rather than mixing it in equal parts with data.
cf_oversampling = True
# Turns off logit tranform in the classifier.
cf_no_logit = True
# Space-separated list of pre-sampled npy files of physical variables if the sampling has been done externally. The format is 
# (mjj, mj1, dmj, tau21_1, tau21_2)
cf_external_samples = ""
# Lower boundary of signal region.
cf_SR_min = 3.3
# Upper boundary of signal region.
cf_SR_max = 3.7
# Number of independent classifier training runs.
cf_n_runs = 1
# Batch size during classifier training.
cf_batch_size = 128
# Use the conditional variable as classifier input during training.
cf_use_mjj = False
# Weight the classes according to their occurence in the training set. 
# Necessary if the training set was intentionally oversampled.'
cf_use_class_weights = True
# Central value of signal region. Must only be given for using CWoLa with weights.
cf_SR_center = 3.5
# Make use of extra background (for supervised and idealized AD).
cf_extra_bkg = False
# Define a separate validation set to pick the classifier epochs.
cf_separate_val_set = True
# Save the tensorflow model after each epoch instead of saving predictions.
cf_save_model = True
# Skips the creation of the classifier dataset (loads existing files instead).
cf_skip_create = False
# Skips the training of the classifier (loads existing files instead).
cf_skip_train = False

In [3]:
import argparse
import os
import torch
import numpy as np
import pickle
from data_handler import LHCORD_data_handler
from ANODE_training_utils import train_ANODE, plot_ANODE_losses
from density_estimator import DensityEstimator

In [4]:
def prepare_processing_dict(source):
    out = {}
    out['max'] = source['max']
    out['min'] = source['min']
    out['mean2'] = source['mean2']
    out['std2'] = source['std2']
    out['std2_logit_fix'] = source['std2_logit_fix']
    return out

In [5]:
kwargs = {
    'config_file': DE_config_file,
    'data_dir': data_dir,
    'savedir': save_dir,
    'datashift': datashift,
    'random_shift': random_shift,
    'verbose': verbose,
    'model_file_name': DE_file_name,
    'epochs': DE_epochs,
    'batch_size': DE_batch_size,
    'no_logit': DE_no_logit,
    'inner_model': False
}

In [6]:
def train_DE(**kwargs):
    
    # for debugging:
    # torch.manual_seed(2104)
    # np.random.seed(2104)

    # selecting appropriate device
    CUDA = torch.cuda.is_available()
    print("cuda available:", CUDA)
    device = torch.device("cuda:0" if CUDA else "cpu")

    # checking for data separation
    data_files = os.listdir(kwargs['data_dir'])
    if "innerdata_val.npy" in data_files:
        finer_data_split = True
    else:
        finer_data_split = False

    if finer_data_split:
        innerdata_val_path = os.path.join(kwargs['data_dir'], 'innerdata_val.npy')
    else:
        innerdata_val_path = os.path.join(kwargs['data_dir'], 'innerdata_test.npy')

    # data preprocessing
    data = LHCORD_data_handler(os.path.join(kwargs['data_dir'], 'innerdata_train.npy'),
                               innerdata_val_path,
                               os.path.join(kwargs['data_dir'], 'outerdata_train.npy'),
                               os.path.join(kwargs['data_dir'], 'outerdata_test.npy'),
                               None,
                               batch_size=kwargs['batch_size'],
                               device=device)
    if kwargs['datashift'] != 0:
        print("applying a datashift of", kwargs['datashift'])
        data.shift_data(kwargs['datashift'], constant_shift=False, random_shift=kwargs['random_shift'],
                        shift_mj1=True, shift_dm=True, additional_shift=False)
    data.preprocess_ANODE_data(no_logit=kwargs['no_logit'],
                               no_mean_shift=kwargs['no_logit'])
    if kwargs['inner_model']:
        train_loader = data.inner_ANODE_datadict_train['loader']
        test_loader = data.inner_ANODE_datadict_test['loader']
        data_std = data.inner_ANODE_datadict_train['std2_logit_fix']
        train_data_processing = prepare_processing_dict(data.inner_ANODE_datadict_train)
    else:
        train_loader = data.outer_ANODE_datadict_train['loader']
        test_loader = data.outer_ANODE_datadict_test['loader']
        data_std = data.outer_ANODE_datadict_train['std2_logit_fix']
        train_data_processing = prepare_processing_dict(data.outer_ANODE_datadict_train)
    pickle.dump(train_data_processing, open(os.path.join(kwargs['savedir'], 'data_processing.p'), 'wb'))

    # actual training
    anode = DensityEstimator(kwargs['config_file'], device=device,
                             verbose=kwargs['verbose'], bound=kwargs['no_logit'])
    model, optimizer = anode.model, anode.optimizer

    train_ANODE(model, optimizer, train_loader, test_loader, kwargs['model_file_name'],
                kwargs['epochs'], savedir=kwargs['savedir'], device=device, verbose=kwargs['verbose'],
                no_logit=kwargs['no_logit'], data_std=data_std)

    # plot losses
    train_losses = np.load(os.path.join(kwargs['savedir'], kwargs['model_file_name']+"_train_losses.npy"))
    val_losses = np.load(os.path.join(kwargs['savedir'], kwargs['model_file_name']+"_val_losses.npy"))
    plot_ANODE_losses(train_losses, val_losses, yrange=None,
                      savefig=os.path.join(kwargs['savedir'], kwargs['model_file_name']+"_loss_plot"),
                      suppress_show=True)

In [25]:
train_DE(**kwargs)

cuda available: False
DensityEstimator has 274800 parameters
n_nans = 0
n_highs = 0
n_nans = 0
n_highs = 0
train_loss =  5.839945674247761
val_loss =  5.839027536881937

Epoch: 0
n_nans = 0
n_highs = 0
train_loss =  5.347782872424023
val_loss =  5.340106931892601

Epoch: 1
n_nans = 0
n_highs = 0
train_loss =  5.325487768468568
val_loss =  5.336295588596447

Epoch: 2
n_nans = 0
n_highs = 0
train_loss =  5.322919966737317
val_loss =  5.334799012622318

Epoch: 3
n_nans = 0
n_highs = 0
train_loss =  5.321476017453504
val_loss =  5.333713773134592

Epoch: 4
n_nans = 0
n_highs = 0
train_loss =  5.320213466970277
val_loss =  5.333408603797087

Epoch: 5
n_nans = 0
n_highs = 0
train_loss =  5.319652804100296
val_loss =  5.332585028699927

Epoch: 6
n_nans = 0
n_highs = 0
train_loss =  5.318870215899185
val_loss =  5.332615278862618

Epoch: 7
n_nans = 0
n_highs = 0
train_loss =  5.318315258486376
val_loss =  5.332469183045465

Epoch: 8
n_nans = 0
n_highs = 0
train_loss =  5.318106327512674
val_lo

n_nans = 0
n_highs = 0
train_loss =  5.298797841396159
val_loss =  5.329059410739589

Epoch: 85
n_nans = 0
n_highs = 0
train_loss =  5.298588411643772
val_loss =  5.32975714271133

Epoch: 86
n_nans = 0
n_highs = 0
train_loss =  5.298495100003247
val_loss =  5.328940259443747

Epoch: 87
n_nans = 0
n_highs = 0
train_loss =  5.2984599942832915
val_loss =  5.327746349412042

Epoch: 88
n_nans = 0
n_highs = 0
train_loss =  5.298545998047918
val_loss =  5.328874072513065

Epoch: 89
n_nans = 0
n_highs = 0
train_loss =  5.2983193115412535
val_loss =  5.328588340733503

Epoch: 90
n_nans = 0
n_highs = 0
train_loss =  5.298404072887395
val_loss =  5.328005648948051

Epoch: 91
n_nans = 0
n_highs = 0
train_loss =  5.298261877798108
val_loss =  5.32799870259053

Epoch: 92
n_nans = 0
n_highs = 0
train_loss =  5.298302890795035
val_loss =  5.3280216455459595

Epoch: 93
n_nans = 0
n_highs = 0
train_loss =  5.298035208914103
val_loss =  5.327960004677644

Epoch: 94
n_nans = 0
n_highs = 0
train_loss =  5.

In [7]:
def find_best_epochs(num_models, **kwargs):
    """ looks through saved val-losses and creates list-of-paths of num_models best ones"""
    val_losses = np.load(os.path.join(kwargs['savedir'], kwargs['model_file_name']+"_val_losses.npy"))
    idx = np.argpartition(val_losses, num_models)[:num_models] #faster than argsort
    ret_list = []
    for index in idx:
        ret_list.append(os.path.join(kwargs['savedir'],
                                     kwargs['model_file_name']+'_epoch_'+str(index-1)+'.par'))
    return ret_list

In [8]:
data_creation_kwargs = {
    'savedir': save_dir,
    'datashift': datashift,
    'data_dir': data_dir,
    'random_shift': random_shift,
    'config_file': DE_config_file,
    'verbose': verbose,
    'fiducial_cut': fiducial_cut,
    'n_samples': cf_n_samples,
    'realistic_conditional': cf_realistic_conditional,
    'KDE_bandwidth': cf_KDE_bandwidth,
    'oversampling': cf_oversampling,
    'no_extra_signal': no_extra_signal,
    'CWoLa': False,
    'supervised': False,
    'idealized_AD': False,
    'no_logit': cf_no_logit,
    'no_logit_trained': DE_no_logit,
    'external_samples': cf_external_samples,
    'SR_min': cf_SR_min,
    'SR_max': cf_SR_max,
    'extra_bkg': cf_extra_bkg,
    'separate_val_set': cf_separate_val_set,
    'ANODE_models': find_best_epochs(10, **kwargs)
}

In [9]:
import torch
import numpy as np
from data_handler import LHCORD_data_handler, sample_handler, mix_data_samples, plot_data_sample_comparison
from density_estimator import DensityEstimator

def create_data(**kwargs):

    assert not ((not (kwargs['supervised'] or kwargs['idealized_AD'] or kwargs['CWoLa']) and\
                 kwargs['external_samples'] == "") and kwargs['ANODE_models'] == ""), (
                     "ANODE models need to be given unless CWoLa, supervised, idealized_AD or"
                     " external sampling is used.")

    # selecting appropriate device
    CUDA = torch.cuda.is_available()
    print("cuda available:", CUDA)
    device = torch.device("cuda:0" if CUDA else "cpu")

    # checking for data separation
    data_files = os.listdir(kwargs['data_dir'])
    if "innerdata_val.npy" in data_files:
        finer_data_split = True
    else:
        finer_data_split = False

    if finer_data_split:
        innerdata_train_path = [os.path.join(kwargs['data_dir'], 'innerdata_train.npy')]
        innerdata_val_path = [os.path.join(kwargs['data_dir'], 'innerdata_val.npy')]
        innerdata_test_path = [os.path.join(kwargs['data_dir'], 'innerdata_test.npy')]
        if "innerdata_extrabkg_test.npy" in data_files:
            innerdata_test_path.append(os.path.join(kwargs['data_dir'], 'innerdata_extrabkg_test.npy'))
        extrasig_path = None
        if kwargs['supervised']:
            innerdata_train_path = []
            innerdata_val_path = []
            innerdata_train_path.append(os.path.join(kwargs['data_dir'], 'innerdata_extrasig_train.npy'))
            innerdata_val_path.append(os.path.join(kwargs['data_dir'], 'innerdata_extrasig_val.npy'))
            innerdata_train_path.append(os.path.join(kwargs['data_dir'], 'innerdata_extrabkg_train.npy'))
            innerdata_val_path.append(os.path.join(kwargs['data_dir'], 'innerdata_extrabkg_val.npy'))
            extra_bkg = None
        elif kwargs['idealized_AD']:
            extra_bkg = [os.path.join(kwargs['data_dir'], 'innerdata_extrabkg_train.npy'),
                         os.path.join(kwargs['data_dir'], 'innerdata_extrabkg_val.npy')]
        else:
            extra_bkg = None

    else:
        innerdata_train_path = os.path.join(kwargs['data_dir'], 'innerdata_train.npy')
        extrasig_path = os.path.join(kwargs['data_dir'], 'innerdata_extrasig.npy')
        if kwargs['extra_bkg']:
            extra_bkg = os.path.join(kwargs['data_dir'], 'innerdata_extrabkg.npy')
        else:
            extra_bkg = None
        innerdata_val_path = None
        innerdata_test_path = os.path.join(kwargs['data_dir'], 'innerdata_test.npy')

    # data preprocessing
    data = LHCORD_data_handler(innerdata_train_path,
                               innerdata_test_path,
                               os.path.join(kwargs['data_dir'], 'outerdata_train.npy'),
                               os.path.join(kwargs['data_dir'], 'outerdata_test.npy'),
                               extrasig_path,
                               inner_extrabkg_path=extra_bkg,
                               inner_val_path=innerdata_val_path,
                               batch_size=256,
                               device=device)
    if kwargs['datashift'] != 0:
        print("applying a datashift of", kwargs['datashift'])
        data.shift_data(kwargs['datashift'], constant_shift=False, random_shift=kwargs['random_shift'],
                        shift_mj1=True, shift_dm=True, additional_shift=False)

    if kwargs['CWoLa']:
        # data preprocessing
        samples = None
        data.preprocess_CWoLa_data(fiducial_cut=kwargs['fiducial_cut'], no_logit=kwargs['no_logit'],
                                   outer_range=(kwargs['SR_min']-0.2, kwargs['SR_max']+0.2))

    else:
        # data preprocessing
        data.preprocess_ANODE_data(fiducial_cut=kwargs['fiducial_cut'],
                                   no_logit=kwargs['no_logit_trained'],
                                   no_mean_shift=kwargs['no_logit_trained'])

        # model instantiation
        if len(kwargs['external_samples']) > 0:
            model_list = None
            loaded_samples = [np.load(sample_path) for sample_path in kwargs['external_samples']]
            external_sample = np.concatenate(loaded_samples)
        else:
            model_list = []
            for model_path in kwargs['ANODE_models']:
                anode = DensityEstimator(kwargs['config_file'],
                                         eval_mode=True,
                                         load_path=model_path,
                                         device=device, verbose=kwargs['verbose'],
                                         bound=kwargs['no_logit_trained'])
                model_list.append(anode.model)
            external_sample = None

        # generate samples
        if not kwargs['supervised'] and not kwargs['idealized_AD']:
            uniform_cond = not kwargs['realistic_conditional']
            samples = sample_handler(model_list, kwargs['n_samples'], data, cond_min=kwargs['SR_min'],
                                     cond_max=kwargs['SR_max'], uniform_cond=uniform_cond,
                                     external_sample=external_sample,
                                     device=device, no_logit=kwargs['no_logit_trained'],
                                     no_mean_shift=kwargs['no_logit_trained'],
                                     KDE_bandwidth=kwargs['KDE_bandwidth'])
        else:
            samples = None

        # redo data preprocessing if the classifier should not use logit but ANODE did
        data.preprocess_ANODE_data(fiducial_cut=kwargs['fiducial_cut'], no_logit=kwargs['no_logit_trained'],
                                   no_mean_shift=kwargs['no_logit_trained'])

        # sample preprocessing
        if not kwargs['supervised'] and not kwargs['idealized_AD']:
            samples.preprocess_samples(fiducial_cut=kwargs['fiducial_cut'], no_logit=kwargs['no_logit_trained'],
                                       no_mean_shift=kwargs['no_logit_trained'])


    # sample mixing
    X_train, y_train, X_test, y_test, X_extrasig, y_extrasig = mix_data_samples(
        data, samples_handler=samples, oversampling=kwargs['oversampling'],
        savedir=kwargs['savedir'], CWoLa=kwargs['CWoLa'], supervised=kwargs['supervised'],
        idealized_AD=kwargs['idealized_AD'], separate_val_set=kwargs['separate_val_set'] or finer_data_split)

    # sanity checks
    if not kwargs['CWoLa'] and not kwargs['supervised'] and not kwargs['idealized_AD']:
        samples.sanity_check(savefig=os.path.join(kwargs['savedir'], "sanity_check"), suppress_show=True)
        samples.sanity_check_after_cuts(savefig=os.path.join(kwargs['savedir'], "sanity_check_cuts"),
                                        suppress_show=True)

    if kwargs['supervised'] or kwargs['separate_val_set'] or finer_data_split:
        X_val = X_extrasig
        if kwargs['supervised']:
            y_train = X_train[:, -1]
            y_test = X_test[:, -1]
            y_val = X_val[:, -1]
        else:
            y_val = X_val[:, -2]
        plot_data_sample_comparison(X_val, y_val, title="validation set",
                                    savefig=os.path.join(kwargs['savedir'],
                                                         "data_sample_comparison_val"),
                                    suppress_show=True)

    plot_data_sample_comparison(X_train, y_train, title="training set",
                                savefig=os.path.join(kwargs['savedir'], "data_sample_comparison_train"),
                                suppress_show=True)
    plot_data_sample_comparison(X_test, y_test, title="test set",
                                savefig=os.path.join(kwargs['savedir'], "data_sample_comparison_test"),
                                suppress_show=True)

    print("number of training data =", X_train.shape[0])
    print("number of test data =", X_test.shape[0])
    if not kwargs['no_extra_signal']:
        if kwargs['supervised'] or kwargs['separate_val_set'] or finer_data_split:
            print("number of validation data =", X_val.shape[0])
        elif extrasig_path is not None:
            print("number of extra signal data =", X_extrasig.shape[0])

In [10]:
create_data(**data_creation_kwargs)

cuda available: False
DensityEstimator has 274800 parameters
Loading model parameters from CATHODE_models/my_ANODE_model_epoch_53.par
DensityEstimator has 274800 parameters
Loading model parameters from CATHODE_models/my_ANODE_model_epoch_60.par
DensityEstimator has 274800 parameters
Loading model parameters from CATHODE_models/my_ANODE_model_epoch_68.par
DensityEstimator has 274800 parameters
Loading model parameters from CATHODE_models/my_ANODE_model_epoch_40.par
DensityEstimator has 274800 parameters
Loading model parameters from CATHODE_models/my_ANODE_model_epoch_44.par
DensityEstimator has 274800 parameters
Loading model parameters from CATHODE_models/my_ANODE_model_epoch_63.par
DensityEstimator has 274800 parameters
Loading model parameters from CATHODE_models/my_ANODE_model_epoch_87.par
DensityEstimator has 274800 parameters
Loading model parameters from CATHODE_models/my_ANODE_model_epoch_99.par
DensityEstimator has 274800 parameters
Loading model parameters from CATHODE_model

In [4]:
classifier_kwargs = {
    'config_file': cf_config_file,
    'data_dir': save_dir,
    'savedir': save_dir,
    'verbose': verbose,
    'epochs': cf_epochs,
    'n_runs': cf_n_runs,
    'batch_size': cf_batch_size,
    'no_extra_signal': no_extra_signal,
    'use_mjj': cf_use_mjj,
    'supervised': False,
    'use_class_weights': cf_oversampling or cf_use_class_weights,
    'CWoLa': False,
    'SR_center': cf_SR_center,
    'save_model': cf_save_model,
    'separate_val_set': cf_separate_val_set
}

In [5]:
from classifier_training_utils import train_n_models, plot_classifier_losses
from evaluation_utils import minimum_val_loss_model_evaluation
import matplotlib as mpl

def train_classifier(**kwargs):

    # loading the data
    # TODO get rid of the y's since the information is fully included in X
    X_train = np.load(os.path.join(kwargs['data_dir'], 'X_train.npy'))
    X_test = np.load(os.path.join(kwargs['data_dir'], 'X_test.npy'))
    y_train = np.load(os.path.join(kwargs['data_dir'], 'y_train.npy'))
    y_test = np.load(os.path.join(kwargs['data_dir'], 'y_test.npy'))
    if kwargs['no_extra_signal'] or kwargs['supervised']:
        X_extrasig = None
    else:
        X_extrasig = np.load(os.path.join(kwargs['data_dir'], 'X_extrasig.npy'))
    if kwargs['supervised'] or kwargs['separate_val_set']:
        X_val = np.load(os.path.join(kwargs['data_dir'], 'X_validation.npy'))        
    else:
        X_val = None

    if kwargs['save_model']:
        if not os.path.exists(kwargs['savedir']):
            os.makedirs(kwargs['savedir'])
        save_model = os.path.join(kwargs['savedir'], "model")
    else:
        save_model = None

    # actual training
    loss_matris, val_loss_matris = train_n_models(
        kwargs['n_runs'], kwargs['config_file'], kwargs['epochs'], X_train, y_train, X_test, y_test,
        X_extrasig=X_extrasig, X_val=X_val, use_mjj=kwargs['use_mjj'], batch_size=kwargs['batch_size'],
        supervised=kwargs['supervised'], use_class_weights=kwargs['use_class_weights'],
        CWoLa=kwargs['CWoLa'], SR_center=kwargs['SR_center'], verbose=kwargs['verbose'],
        savedir=kwargs['savedir'], save_model=save_model)

    if kwargs['save_model']:
        minimum_val_loss_model_evaluation(kwargs['data_dir'], kwargs['savedir'], n_epochs=10,
                                use_mjj=kwargs['use_mjj'], extra_signal=not kwargs['no_extra_signal'])

    for i in range(loss_matris.shape[0]):
        plot_classifier_losses(
            loss_matris[i], val_loss_matris[i],
            savefig=save_model+"_run"+str(i)+"_loss_plot",
            suppress_show=True
        )

In [6]:
train_classifier(**classifier_kwargs)

Training model nr 0...
training epoch nr 0
training loss: 0.6932564973831177
validation loss: 0.6931200623512268
training epoch nr 1
training loss: 0.6931014060974121
validation loss: 0.6932196021080017
training epoch nr 2
training loss: 0.6930747032165527
validation loss: 0.6930397748947144
training epoch nr 3
training loss: 0.6930102705955505
validation loss: 0.6930329203605652
training epoch nr 4
training loss: 0.6929495334625244
validation loss: 0.6932007074356079
training epoch nr 5
training loss: 0.6928628087043762
validation loss: 0.6929649710655212
training epoch nr 6
training loss: 0.6927865147590637
validation loss: 0.6930946707725525
training epoch nr 7
training loss: 0.6926776766777039
validation loss: 0.6928848028182983
training epoch nr 8
training loss: 0.6926499605178833
validation loss: 0.6929194927215576
training epoch nr 9
training loss: 0.6926344037055969
validation loss: 0.6928948760032654
training epoch nr 10
training loss: 0.6925844550132751
validation loss: 0.692

training loss: 0.6895214319229126
validation loss: 0.6958550810813904
training epoch nr 91
training loss: 0.6894177198410034
validation loss: 0.6964117288589478
training epoch nr 92
training loss: 0.6894461512565613
validation loss: 0.6960422396659851
training epoch nr 93
training loss: 0.6894630193710327
validation loss: 0.695569634437561
training epoch nr 94
training loss: 0.6892589926719666
validation loss: 0.6960283517837524
training epoch nr 95
training loss: 0.6893377304077148
validation loss: 0.6966918110847473
training epoch nr 96
training loss: 0.6891807317733765
validation loss: 0.6965463757514954
training epoch nr 97
training loss: 0.6892654895782471
validation loss: 0.6964305639266968
training epoch nr 98
training loss: 0.6891840696334839
validation loss: 0.6961252093315125
training epoch nr 99
training loss: 0.689077615737915
validation loss: 0.6962637305259705
minimum validation loss epochs: [10 11 13 12  9  7 14 17  8  5]


In [48]:
train_classifier(**classifier_kwargs)

Training model nr 0...
training epoch nr 0
training loss: 0.6932389140129089
validation loss: 0.6931337714195251
training epoch nr 1
training loss: 0.6931347846984863
validation loss: 0.6931878328323364
training epoch nr 2
training loss: 0.6931366324424744
validation loss: 0.6931458115577698
training epoch nr 3
training loss: 0.6930834054946899
validation loss: 0.6932743787765503
training epoch nr 4
training loss: 0.6930044889450073
validation loss: 0.6930981874465942
training epoch nr 5
training loss: 0.6929227709770203
validation loss: 0.6932023763656616
training epoch nr 6
training loss: 0.6928586959838867
validation loss: 0.6930224895477295
training epoch nr 7
training loss: 0.6927919983863831
validation loss: 0.6929300427436829
training epoch nr 8
training loss: 0.6926838755607605
validation loss: 0.6929951906204224
training epoch nr 9
training loss: 0.6925961971282959
validation loss: 0.6929051280021667
training epoch nr 10
training loss: 0.6925469040870667
validation loss: 0.693

training loss: 0.6895325779914856
validation loss: 0.6955487728118896
training epoch nr 91
training loss: 0.6894334554672241
validation loss: 0.6952421069145203
training epoch nr 92
training loss: 0.6892732381820679
validation loss: 0.6959866881370544
training epoch nr 93
training loss: 0.689393162727356
validation loss: 0.6960118412971497
training epoch nr 94
training loss: 0.6893923878669739
validation loss: 0.6955026388168335
training epoch nr 95
training loss: 0.689291775226593
validation loss: 0.6954663991928101
training epoch nr 96
training loss: 0.6892255544662476
validation loss: 0.6952071785926819
training epoch nr 97
training loss: 0.6891579031944275
validation loss: 0.6958538293838501
training epoch nr 98
training loss: 0.6892915964126587
validation loss: 0.6953274011611938
training epoch nr 99
training loss: 0.6892670392990112
validation loss: 0.6953089833259583
minimum validation loss epochs: [24 13  9  7 18 22  8 14  6  4]


In [7]:
_ = full_single_evaluation(save_dir, save_dir, n_ensemble_epochs=10,
                           extra_signal=not no_extra_signal, sic_range=(0, 20),
                           savefig=os.path.join(save_dir, 'result_SIC'))