In [1]:
##%matplotlib widget
## with %matplotlib notebook: seems to require ipympl as part of environment, either
## part of the conda environment or "pip install ipympl"
## otherwise, does not show ANY plots in notebook, plt.savefig() works
%matplotlib notebook  
##%matplotlib inline    ## --plt.savefig()  works, but re-sizing does NOT

This notebook is derived from RunModel_Demo_28November2020-SimpleCNNLayer_Ca which "worked", but did not reach as high efficiency as earlier models. Here, the number of hidden convolutional layers is increased by 2 and the number of output channels per layer is increased by using SimpleCNN7Layer_Ca_Two_KDE in place of SimpleCNN5Layer_Ca_Two_KDE

The goal is to use two channels of input -- poca_KDE_A and poca_KDE_B to see if this improves the performance of the algorithm, all else being equal.

"withPfc" denotes using poca_KDE_A_xMax & poca_KDE_A_yMax as perturbative features

We use full LHCb MC for both training and validation here, albeit a training sample slightly less than 100K events.


Check the current GPU usage. Please try to be nice!

In [2]:
!nvidia-smi

Wed Dec  8 15:49:25 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.56       CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  GeForce RTX 208...  On   | 00000000:18:00.0 Off |                  N/A |
| 29%   28C    P8    20W / 250W |      3MiB / 11019MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  GeForce RTX 208...  On   | 00000000:3B:00.0 Off |                  N/A |
| 29%   29C    P8    21W / 250W |   2662MiB / 11019MiB |      0%      Default |
|       

> **WARNING**: The card numbers here are *not* the same as in CUDA. You have been warned.

## Imports

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import time
import torch
import pandas as pd
import mlflow

# Python 3 standard library
from pathlib import Path

from torchsummary import summary

### Set up local parameters

In [4]:
n_epochs = 50

# Name is the output file name


##  201128  mds
##  iter2 follows from iter0 (only last layer weights allowed to be learned)
##  due to operator error, the file/folder name was iter1 rather than iter2
##  so the labeling is slightly mis-leading.  This is the first iteration
##  when all weights are allowed to vary
folder = '18December2020_AllCNN8Layer_withPcnn_50epochs_lr_1em6_aymm_0p0'
name = folder

# Make an output folder named "name" (change if you want)

## Special instructions for those working on goofy at UC
## Please be very careful to make sure that your folder
## does not live in a subdirectory of your home directory
## this disk has very little capacity. Instead, use 
## a subdirectory in /share/lazy with a symbolic link to
## it in this (the notebooks) subdirectory
folder = 'ML/' + folder
output = Path(folder)


# Size of batches
batch_size = 128

# How fast to learn
learning_rate = 1e-6

  and should_run_async(code)


Make the output directory if it does not exist:

In [5]:
output.mkdir(exist_ok=True)

## Get the helper functions

Add the directory with the model
definitions to the path so we can import from it:

> When you type `import X`,
Python searches `sys.path` for a python
file named `X.py` to import. So we need to add the model directory to the path.

In [6]:
# From model/collectdata.py
from model.collectdata_poca_KDE import collect_data_poca

# From model/loss.py
##from loss import Loss
from model.alt_loss_A import Loss

# From model/training.py
from model.training import trainNet, select_gpu

# From model/models.py
##  will start with model from TwoFeatures_CNN6Layer_A in the first instance
##  see relevant cell below


from model.models_mds_G import AllCNN8Layer_Ca_Two_KDE_withPcnn as Model

from model.training import trainNet, select_gpu, Results
from model.plots import dual_train_plots, replace_in_ax

from model.utilities import load_full_state, count_parameters, Params, save_to_mlflow


    pip install -U awkward1

In Python:

    >>> import awkward1 as ak
    >>> new_style_array = ak.from_awkward0(old_style_array)
    >>> old_style_array = ak.to_awkward0(new_style_array)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  {"minsize": 8192, "types": [numpy.bool_, numpy.bool, numpy.integer], "contexts": "*", "pair": (zlib.compress, ("zlib", "decompress"))},
  from ._conv import register_converters as _register_converters
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  long_ = _make_signed(np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if (hasattr(numpy, value)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if (hasattr(numpy, value)
Deprecated in NumPy 1.20; for 

In [7]:
# This gets built up during the run - do not rerun this cell
results = pd.DataFrame([], columns=Results._fields)

  and should_run_async(code)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  elif issubclass(data.dtype.type, np.bool) or is_bool_dtype(data):


Set up Torch device configuration. All tensors and model parameters need to know where to be put.
This takes a BUS ID number: The BUS ID is the same as the listing at the top of this script.

In [8]:
device = select_gpu(0)

1 available GPUs (initially using device 0):
  0 GeForce RTX 2080 Ti


## Loading data

Load the dataset, split into parts, then move to device (see `collectdata.py` in the `../model` directory)

In [9]:
## newer vernacular
## Training dataset. You can put as many files here as desired.

## in this DEMO example we use only one 80K training set -- the model starts with well-trained weights,
## and using a smaller training set reduces both the time to load the data and the time to train an epoch
##  set the option load_XandXsq = True to use both DKE and KDE^2 as input features
##  added dataAA/pv_HLT1CPU_D0piMagUp_12Dec.h5  201212

##  pv_HLT1CPU_D0piMagUp_12Dec.h5 + pv_HLT1CPU_MinBiasMagDown_14Nov.h5 contain 138810 events
##  pv_HLT1CPU_MinBiasMagUp_14Nov.h5 contains 51349
##  choose which to "load" and slices to produce 180K event training sample
##   and 10159 event validation sample
train_loader = collect_data_poca(
                              'dataAA/pv_HLT1CPU_MinBiasMagDown_14Nov.h5',
                              'dataAA/pv_HLT1CPU_JpsiPhiMagDown_12Dec.h5',
                              'dataAA/pv_HLT1CPU_D0piMagUp_12Dec.h5',
                              'dataAA/pv_HLT1CPU_MinBiasMagUp_14Nov.h5',
                               slice = slice(None,260000),
##                             device = device,
                             batch_size=batch_size,
## if we are using a larger dataset (240K events, with the datasets above, and 11 GB  of GPU memory),
## not the dataset will overflow the GPU memory; device=device will allow the data to move back
## and forth between the CPU and GPU memory. While this allows use of a larger dataset, it slows
## down performance by about 10%.  So comment out when not needed.
##                           device=device,
                            masking=True, shuffle=True,
                            load_XandXsq=False,
                            load_A_and_B = True,
                            load_xy=True)

# Validation dataset. You can slice to reduce the size.
## dataAA -> /share/lazy/sokoloff/ML-data_AA/
val_loader = collect_data_poca(
##                          'dataAA/pv_HLT1CPU_MinBiasMagDown_14Nov.h5',
                            'dataAA/pv_HLT1CPU_MinBiasMagUp_14Nov.h5',
##                            'dataAA/pv_HLT1CPU_D0piMagUp_12Dec.h5',
                          batch_size=batch_size,
                          slice=slice(33000,None),
##                          device=device,
                          masking=True, shuffle=False,
                          load_XandXsq=False,
                          load_A_and_B = True,
                          load_xy=True)

Loading data...
Loaded dataAA/pv_HLT1CPU_MinBiasMagDown_14Nov.h5 in 32.76 s
Loaded dataAA/pv_HLT1CPU_JpsiPhiMagDown_12Dec.h5 in 58.39 s
Loaded dataAA/pv_HLT1CPU_D0piMagUp_12Dec.h5 in 58.76 s
Loaded dataAA/pv_HLT1CPU_MinBiasMagUp_14Nov.h5 in 40.14 s
Constructing 260000 event dataset took 3.835 s
Loading data...
Loaded dataAA/pv_HLT1CPU_MinBiasMagUp_14Nov.h5 in 30.81 s
Constructing 18349 event dataset took 0.161 s


In [10]:
mlflow.tracking.set_tracking_uri('file:/share/lazy/pv-finder_model_repo')
mlflow.set_experiment('Top Models LHCb MC Data')

Traceback (most recent call last):
  File "/apps/miniconda3/envs/june2020-gpu/lib/python3.7/site-packages/mlflow/store/tracking/file_store.py", line 197, in list_experiments
    experiment = self._get_experiment(exp_id, view_type)
  File "/apps/miniconda3/envs/june2020-gpu/lib/python3.7/site-packages/mlflow/store/tracking/file_store.py", line 260, in _get_experiment
    meta = read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/apps/miniconda3/envs/june2020-gpu/lib/python3.7/site-packages/mlflow/utils/file_utils.py", line 167, in read_yaml
    raise MissingConfigException("Yaml file '%s' does not exist." % file_path)
mlflow.exceptions.MissingConfigException: Yaml file '/share/lazy/pv-finder_model_repo/ML/meta.yaml' does not exist.
Traceback (most recent call last):
  File "/apps/miniconda3/envs/june2020-gpu/lib/python3.7/site-packages/mlflow/store/tracking/file_store.py", line 197, in list_experiments
    experiment = self._get_experiment(exp_id, view_type)
  File "/apps/

# Preparing the model

Prepare a model, use multiple GPUs if they are VISIBLE, and move the model to the device.

In [11]:
model = Model()

##summary(model, input_size=(4, 4000))
##print(model.parameters)

## add the following code to allow the user to freeze the some of the weights corresponding 
## to those taken from an earlier model trained with the original target histograms
## presumably -- this leaves either the perturbative filter "fixed" and lets the 
## learning focus on the non-perturbative features, so get started faster, or vice versa
ct = 0
for child in model.children():
  print('ct, child = ',ct, "  ", child)
  if ct < 0:
    print("     About to set param.requires_grad=False for ct = ", ct, "params")
    for param in child.parameters():
        param.requires_grad = True 
  ct += 1
##  mds 200121 loss = Loss(epsilon=1e-5,coefficient=1.0)
asymmetry_parameter=2.5
loss = Loss(epsilon=1e-5,coefficient=asymmetry_parameter)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

ct, child =  0    Conv1d(2, 25, kernel_size=(25,), stride=(1,), padding=(12,))
ct, child =  1    Conv1d(25, 25, kernel_size=(15,), stride=(1,), padding=(7,))
ct, child =  2    Conv1d(25, 25, kernel_size=(15,), stride=(1,), padding=(7,))
ct, child =  3    Conv1d(25, 25, kernel_size=(15,), stride=(1,), padding=(7,))
ct, child =  4    Conv1d(25, 25, kernel_size=(15,), stride=(1,), padding=(7,))
ct, child =  5    Conv1d(25, 25, kernel_size=(15,), stride=(1,), padding=(7,))
ct, child =  6    Conv1d(25, 1, kernel_size=(5,), stride=(1,), padding=(2,))
ct, child =  7    Dropout(p=0.15, inplace=False)
ct, child =  8    Dropout(p=0.15, inplace=False)
ct, child =  9    Dropout(p=0.15, inplace=False)
ct, child =  10    Dropout(p=0.15, inplace=False)
ct, child =  11    Dropout(p=0.15, inplace=False)
ct, child =  12    Dropout(p=0.15, inplace=False)
ct, child =  13    Dropout(p=0.15, inplace=False)
ct, child =  14    Linear(in_features=4000, out_features=4000, bias=True)
ct, child =  15    Conv1d(1,

In [12]:
print("Let's use", torch.cuda.device_count(), "GPUs!")
if torch.cuda.device_count() > 1:
    model = torch.nn.DataParallel(model)

Let's use 1 GPUs!


Let's move the model's weight matricies to the GPU:

In [13]:
##print('output = ',output)
##print('oldOutput = ',oldOutput)
##  use the first four layers from a pre-existing model
##  see example at https://discuss.pytorch.org/t/how-to-load-part-of-pre-trained-model/1113

##   ML -> /share/lazy/sokoloff/ML
## not that m1p0 in file name is mis-leading  201212
oldName = '17December2020_AllCNN8Layer_withPcnn_200epochs_moreDataIter1_lr_1em6_aymm_1p0_majorTesting'
oldFolder = oldName
suffix = 'final'
dictName = 'ML/'+oldFolder+'/'+oldName+'_'+suffix+'.pyt'
print('dictName = ',dictName)
pretrained_dict = torch.load(dictName)
model_dict = model.state_dict()
## mds 190725 for debugging
print("for model_dict")
index = 0
for k,v in model_dict.items():
    print("index, k =  ",index,"  ",k)
    index = index+1
    
print(" \n","  for pretrained_dict")
index = 0
for k,v in pretrained_dict.items():
    print("index, k =  ",index,"  ",k)
    index = index+1
## mds  

print("model_dict instantiated")
# 1. filter out unnecessary keys
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
## print("pretrained_dict iterated")
# 2. overwrite entries in the existing state dict
model_dict.update(pretrained_dict) 
##
#   when starting from a model with a fully connected last layer rather than a convolutional layer
# 3. load the new state dict
#   need to use strict=False as the two models state model attributes do not agree exactly
#   see https://pytorch.org/docs/master/_modules/torch/nn/modules/module.html#Module.load_state_dict

model.load_state_dict(pretrained_dict,strict=False)

## print('model_dict =    ', model_dict)


dictName =  ML/17December2020_AllCNN8Layer_withPcnn_200epochs_moreDataIter1_lr_1em6_aymm_1p0_majorTesting/17December2020_AllCNN8Layer_withPcnn_200epochs_moreDataIter1_lr_1em6_aymm_1p0_majorTesting_final.pyt
for model_dict
index, k =   0    conv1.weight
index, k =   1    conv1.bias
index, k =   2    conv2.weight
index, k =   3    conv2.bias
index, k =   4    conv3.weight
index, k =   5    conv3.bias
index, k =   6    conv4.weight
index, k =   7    conv4.bias
index, k =   8    conv5.weight
index, k =   9    conv5.bias
index, k =   10    conv6.weight
index, k =   11    conv6.bias
index, k =   12    conv7.weight
index, k =   13    conv7.bias
index, k =   14    fc1.weight
index, k =   15    fc1.bias
index, k =   16    finalFilter.weight
index, k =   17    finalFilter.bias
index, k =   18    ppConv1.weight
index, k =   19    ppConv1.bias
index, k =   20    ppConv2.weight
index, k =   21    ppConv2.bias
index, k =   22    ppConv3.weight
index, k =   23    ppConv3.bias
index, k =   24    ppFc1

<All keys matched successfully>

In [14]:
model = model.to(device)

In [15]:
parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)

run_name = 'verify-paper'

## Train 



The body of this loop runs once per epoch. Results is a named tuple of values (loss per epoch for training and validation, time each). Start by setting up a plot first:

In [16]:
ax, tax, lax, lines = dual_train_plots()
fig = ax.figure
plt.tight_layout()

<IPython.core.display.Javascript object>

In [17]:
avgEff = 0.0
avgFP = 0.0

train_iter = enumerate(trainNet(model, optimizer, loss,
                        train_loader, val_loader,
                        n_epochs, epoch_start=len(results),
                        notebook=True))

with mlflow.start_run(run_name=run_name) as run:
    for i, result in train_iter:
        print(result.cost)
        torch.save(model, 'run_stats.pyt')
        mlflow.log_artifact('run_stats.pyt')
        
        
        # If we are on the last 10 epochs but NOT the last epoch
        if(i >= n_epochs - 10):
            avgEff += result.eff_val.eff_rate
            avgFP += result.eff_val.fp_rate
           
        # If we are on the last epoch
        if(i == n_epochs - 1):
            print('Averaging...\n')
            avgEff /= 10
            avgFP /= 10
            mlflow.log_metric('10 Eff Avg.', avgEff)
            mlflow.log_metric('10 FP Avg.', avgFP)
            print('Average Eff: ', avgEff)
            print('Average FP Rate: ', avgFP)
        
        save_to_mlflow({
            'Metric: Training loss':result.cost,
            'Metric: Validation loss':result.val,
            'Metric: Efficiency':result.eff_val.eff_rate,
            'Metric: False positive rate':result.eff_val.fp_rate,
            'Param: Parameters':parameters,
#            'Param: Events':events,
            'Param: Asymmetry':asymmetry_parameter,
            'Param: Epochs':n_epochs,
            'Param: Learning Rate':learning_rate,
        }, step=i)
        
        
        results = results.append(pd.Series(result._asdict()), ignore_index=True)

        xs = results.index

        # Update the plot above
        lines['train'].set_data(results.index,results.cost)
        lines['val'].set_data(results.index,results.val)

        #filter first cost epoch (can be really large)
        max_cost = max(max(results.cost if len(results.cost)<2 else results.cost[1:]), max(results.val))
        min_cost = min(min(results.cost), min(results.val))

        # The plot limits need updating too



        ax.set_ylim(min_cost*.9, max_cost*1.1)  
        ax.set_xlim(-.5, len(results.cost) - .5)

        replace_in_ax(lax, lines['eff'], xs, results['eff_val'].apply(lambda x: x.eff_rate))
        replace_in_ax(tax, lines['fp'], xs, results['eff_val'].apply(lambda x: x.fp_rate))

        # Redraw the figure
        fig.canvas.draw()

Number of batches: train = 2032, val = 144


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  file=sys.stderr,


HBox(children=(FloatProgress(value=0.0, description='Epochs', layout=Layout(flex='2'), max=50.0, style=Progres…

HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

x0 =  tensor([[5.1178e-38, 7.3820e-38, 3.1840e-38,  ..., 1.1949e-15, 6.3839e-17,
         2.4993e-11],
        [6.3960e-17, 5.5380e-15, 5.1700e-15,  ..., 1.8993e-15, 2.8607e-15,
         4.3166e-16],
        [1.8671e-28, 1.9284e-29, 3.5981e-30,  ..., 1.1682e-24, 1.0033e-24,
         1.5000e-23],
        ...,
        [2.3222e-34, 4.0661e-35, 1.6487e-36,  ..., 4.0809e-22, 1.5747e-23,
         1.8473e-23],
        [1.7712e-33, 4.7844e-30, 3.5013e-28,  ..., 6.4070e-25, 7.2583e-25,
         1.2036e-24],
        [1.2967e-14, 1.5939e-12, 2.4780e-11,  ..., 2.3043e-24, 3.5471e-24,
         4.4920e-24]], device='cuda:0', grad_fn=<SoftplusBackward>)
x1 =  tensor([[ 0.0271, -0.0070, -0.0472,  ..., -0.0207, -0.0015,  0.0149],
        [ 0.0084, -0.0008, -0.0041,  ..., -0.0648, -0.0376, -0.0154],
        [-0.0388, -0.0710, -0.0983,  ..., -0.0492, -0.0154,  0.0104],
        ...,
        [ 0.0700,  0.0680,  0.0638,  ..., -0.0650, -0.0293, -0.0020],
        [ 0.0948,  0.0951,  0.0961,  ..., -0.0187,  0.

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  elif issubclass(data.dtype.type, np.bool) or is_bool_dtype(data):


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 1: train=4.27086, val=3.91716, took 176.62 s
  Validation Found 91641 of 93880, added 3094 (eff 97.62%) (0.169 FP/event)
4.270859236528319


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 2: train=4.26906, val=3.9109, took 187.09 s
  Validation Found 91645 of 93880, added 3091 (eff 97.62%) (0.168 FP/event)
4.269063576058609


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 3: train=4.26711, val=3.90892, took 178.96 s
  Validation Found 91642 of 93880, added 3092 (eff 97.62%) (0.169 FP/event)
4.267113708311648


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 4: train=4.26732, val=3.9071, took 167.85 s
  Validation Found 91637 of 93880, added 3092 (eff 97.61%) (0.169 FP/event)
4.26731766179556


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 5: train=4.26577, val=3.9059, took 171.92 s
  Validation Found 91637 of 93880, added 3099 (eff 97.61%) (0.169 FP/event)
4.265765871705971


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 6: train=4.26337, val=3.9056, took 173.46 s
  Validation Found 91630 of 93880, added 3092 (eff 97.60%) (0.169 FP/event)
4.26336633334831


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 7: train=4.26459, val=3.90503, took 174.31 s
  Validation Found 91622 of 93880, added 3070 (eff 97.59%) (0.167 FP/event)
4.264587278793178


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 8: train=4.2606, val=3.90355, took 173.19 s
  Validation Found 91629 of 93880, added 3085 (eff 97.60%) (0.168 FP/event)
4.260602195901195


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 9: train=4.26237, val=3.9023, took 173.23 s
  Validation Found 91635 of 93880, added 3099 (eff 97.61%) (0.169 FP/event)
4.262370673631589


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 10: train=4.26217, val=3.90289, took 171.4 s
  Validation Found 91630 of 93880, added 3089 (eff 97.60%) (0.168 FP/event)
4.2621698513275055


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 11: train=4.26228, val=3.90234, took 177.49 s
  Validation Found 91634 of 93880, added 3095 (eff 97.61%) (0.169 FP/event)
4.262279232714589


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 12: train=4.26158, val=3.90183, took 171.03 s
  Validation Found 91634 of 93880, added 3102 (eff 97.61%) (0.169 FP/event)
4.261584374144322


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 13: train=4.2608, val=3.90102, took 181.3 s
  Validation Found 91630 of 93880, added 3097 (eff 97.60%) (0.169 FP/event)
4.260802920526407


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 14: train=4.25909, val=3.90096, took 173.4 s
  Validation Found 91629 of 93880, added 3099 (eff 97.60%) (0.169 FP/event)
4.259086210192658


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 15: train=4.25933, val=3.89974, took 171.51 s
  Validation Found 91634 of 93880, added 3114 (eff 97.61%) (0.17 FP/event)
4.25932861151775


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 16: train=4.25967, val=3.90057, took 169.91 s
  Validation Found 91625 of 93880, added 3082 (eff 97.60%) (0.168 FP/event)
4.259666149775813


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 17: train=4.26185, val=3.89986, took 172.75 s
  Validation Found 91628 of 93880, added 3085 (eff 97.60%) (0.168 FP/event)
4.261854164977008


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 18: train=4.25973, val=3.89912, took 171.07 s
  Validation Found 91627 of 93880, added 3081 (eff 97.60%) (0.168 FP/event)
4.259728419440468


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 19: train=4.26128, val=3.898, took 174.25 s
  Validation Found 91628 of 93880, added 3097 (eff 97.60%) (0.169 FP/event)
4.261275661273266


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 20: train=4.26025, val=3.89787, took 187.96 s
  Validation Found 91629 of 93880, added 3105 (eff 97.60%) (0.169 FP/event)
4.2602474051197685


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 21: train=4.25799, val=3.89893, took 184.4 s
  Validation Found 91631 of 93880, added 3097 (eff 97.60%) (0.169 FP/event)
4.257992053712447


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 22: train=4.25904, val=3.89837, took 170.05 s
  Validation Found 91632 of 93880, added 3103 (eff 97.61%) (0.169 FP/event)
4.259042694108692


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 23: train=4.26038, val=3.89665, took 169.71 s
  Validation Found 91633 of 93880, added 3101 (eff 97.61%) (0.169 FP/event)
4.2603804809371315


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 24: train=4.26026, val=3.89742, took 172.63 s
  Validation Found 91635 of 93880, added 3099 (eff 97.61%) (0.169 FP/event)
4.26025870369058


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 25: train=4.26068, val=3.89797, took 171.21 s
  Validation Found 91621 of 93880, added 3082 (eff 97.59%) (0.168 FP/event)
4.26067534790147


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 26: train=4.2578, val=3.89874, took 169.34 s
  Validation Found 91619 of 93880, added 3084 (eff 97.59%) (0.168 FP/event)
4.2577950969690415


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 27: train=4.25948, val=3.89821, took 168.58 s
  Validation Found 91626 of 93880, added 3088 (eff 97.60%) (0.168 FP/event)
4.259482985670407


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 28: train=4.25738, val=3.89794, took 171.02 s
  Validation Found 91617 of 93880, added 3084 (eff 97.59%) (0.168 FP/event)
4.257375090844988


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 29: train=4.25867, val=3.89808, took 168.53 s
  Validation Found 91623 of 93880, added 3078 (eff 97.60%) (0.168 FP/event)
4.258671944418292


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 30: train=4.2583, val=3.8971, took 170.53 s
  Validation Found 91634 of 93880, added 3105 (eff 97.61%) (0.169 FP/event)
4.2582979332509


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 31: train=4.2603, val=3.89788, took 168.74 s
  Validation Found 91632 of 93880, added 3099 (eff 97.61%) (0.169 FP/event)
4.2602964502385285


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 32: train=4.26001, val=3.89766, took 178.75 s
  Validation Found 91623 of 93880, added 3072 (eff 97.60%) (0.167 FP/event)
4.260013466275583


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 33: train=4.25624, val=3.89697, took 189.82 s
  Validation Found 91627 of 93880, added 3074 (eff 97.60%) (0.168 FP/event)
4.25623506248936


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 34: train=4.25706, val=3.89593, took 173.41 s
  Validation Found 91628 of 93880, added 3098 (eff 97.60%) (0.169 FP/event)
4.257057984572227


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 35: train=4.25954, val=3.89635, took 178.32 s
  Validation Found 91626 of 93880, added 3076 (eff 97.60%) (0.168 FP/event)
4.259540324957352


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 36: train=4.25825, val=3.89595, took 171.5 s
  Validation Found 91631 of 93880, added 3111 (eff 97.60%) (0.17 FP/event)
4.258251519595075


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 37: train=4.25782, val=3.89664, took 169.66 s
  Validation Found 91626 of 93880, added 3101 (eff 97.60%) (0.169 FP/event)
4.257816890415948


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 38: train=4.2595, val=3.89386, took 167.74 s
  Validation Found 91647 of 93880, added 3124 (eff 97.62%) (0.17 FP/event)
4.259501166348382


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 39: train=4.25585, val=3.89589, took 159.99 s
  Validation Found 91629 of 93880, added 3094 (eff 97.60%) (0.169 FP/event)
4.255846993484366


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 40: train=4.25752, val=3.89508, took 155.89 s
  Validation Found 91631 of 93880, added 3092 (eff 97.60%) (0.169 FP/event)
4.257523758908895


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 41: train=4.25738, val=3.89582, took 155.58 s
  Validation Found 91626 of 93880, added 3081 (eff 97.60%) (0.168 FP/event)
4.257376391880625


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 42: train=4.25872, val=3.89616, took 155.46 s
  Validation Found 91627 of 93880, added 3092 (eff 97.60%) (0.169 FP/event)
4.25872041862898


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 43: train=4.25632, val=3.89537, took 155.05 s
  Validation Found 91628 of 93880, added 3088 (eff 97.60%) (0.168 FP/event)
4.256317275480961


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 44: train=4.258, val=3.89533, took 154.82 s
  Validation Found 91626 of 93880, added 3095 (eff 97.60%) (0.169 FP/event)
4.257997542502373


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 45: train=4.25719, val=3.89543, took 154.65 s
  Validation Found 91629 of 93880, added 3090 (eff 97.60%) (0.168 FP/event)
4.257192259698402


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 46: train=4.25571, val=3.89615, took 154.65 s
  Validation Found 91632 of 93880, added 3104 (eff 97.61%) (0.169 FP/event)
4.255708003020662


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 47: train=4.2548, val=3.8958, took 154.67 s
  Validation Found 91626 of 93880, added 3074 (eff 97.60%) (0.168 FP/event)
4.254796795019015


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 48: train=4.25604, val=3.89554, took 154.6 s
  Validation Found 91633 of 93880, added 3085 (eff 97.61%) (0.168 FP/event)
4.256035415294368


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=2032.0, style=Pro…

Epoch 49: train=4.25681, val=3.89465, took 154.83 s
  Validation Found 91635 of 93880, added 3104 (eff 97.61%) (0.169 FP/event)
4.256813661613333
Averaging...

Average Eff:  0.9760257775884107
Average FP Rate:  0.16841961852861037



# Results

Let's save some results: (even though if you have not changed the code above, it saves the model every epoch)

Go ahead and save the final model (even though it was also saved above):

In [18]:
#torch.save(model.state_dict(), output / f'{name}_final.pyt')

  and should_run_async(code)


Save the output results:

In [19]:
#results.to_hdf(f'{name}_stats.hdf5', 'results')

Save the plot above:

In [20]:
dual_train_plots(results.index,
                 results.cost, results.val, 
                 results['eff_val'].apply(lambda x: x.eff_rate),
                 results['eff_val'].apply(lambda x: x.fp_rate))
plt.tight_layout()
plt.savefig(str(output / f'{name}_stats_a.png'))

<IPython.core.display.Javascript object>

PermissionError: [Errno 13] Permission denied: 'ML/18December2020_AllCNN8Layer_withPcnn_50epochs_lr_1em6_aymm_0p0/18December2020_AllCNN8Layer_withPcnn_50epochs_lr_1em6_aymm_0p0_stats_a.png'

In [None]:
##quit()