In [None]:
# imports

# external modules
import sys
import os
import numpy as np
import pandas as pd
import keras
import matplotlib as mpl
import matplotlib.pyplot as plt
import importlib
# framework modules
sys.path.append('../')
import plotting.plottools
importlib.reload(plotting.plottools)
from plotting.plottools import plot_histogram
import training.prepare_training_set
importlib.reload(training.prepare_training_set)
from training.prepare_training_set import prepare_training_data_from_files

In [None]:
# load the evaluation set

#me = 'PixelPhase1-Tracks-PXForward-clusterposition_xy_ontrack_PXDisk_+1'
#me = 'PixelPhase1-Tracks-PXForward-clusterposition_xy_ontrack_PXDisk_+2'
me = 'PixelPhase1-Tracks-PXForward-clusterposition_xy_ontrack_PXDisk_+3'
#me = 'PixelPhase1-Tracks-PXForward-clusterposition_xy_ontrack_PXDisk_-1'
#me = 'PixelPhase1-Tracks-PXForward-clusterposition_xy_ontrack_PXDisk_-2'
#me = 'PixelPhase1-Tracks-PXForward-clusterposition_xy_ontrack_PXDisk_-3'

files = ([
    '../data/data/ZeroBias-Run2023C-PromptReco-v1-DQMIO-{}_preprocessed.parquet'.format(me)
])
kwargs = ({
    'verbose': True,
    'entries_threshold': 10000,
    'skip_first_lumisections': 5
})
(eval_data, eval_runs, eval_lumis) = prepare_training_data_from_files(files, **kwargs)

In [None]:
# make a mask where values are often zero

shape_mask = (np.sum(eval_data[:,:,:,0]==0, axis=0)>len(eval_data)/2.)

fig,ax = plt.subplots()
plot_histogram(shape_mask, fig=fig, ax=ax, caxrange=(-0.01,1))
ax.text(0.02, 1.02, 'Shape mask', transform=ax.transAxes, fontsize=12)

In [None]:
# load keras model
modelname = '../models/model_20231115_Run2023C-v1_{}.keras'.format(me)
model = keras.models.load_model(modelname)

In [None]:
# load average occupancy or error of training set

avgresponsename = '../models/model_20231115_Run2023C-v1_{}_avgoccupancy.npy'.format(me)
avgresponse = np.load(avgresponsename)
avgresponse = np.square(avgresponse)

fig,ax = plt.subplots()
plot_histogram(avgresponse, fig=fig, ax=ax)
ax.text(0.02, 1.02, 'Average response on training set', transform=ax.transAxes, fontsize=12)
avgresponse[avgresponse==0] = 1
avgresponse = np.expand_dims(avgresponse, axis=2)

In [None]:
# evaluate the model

predictions = model.predict(eval_data)
predictions[predictions<0] = 0.
predictions[:,shape_mask] = 0.

In [None]:
# calculate squared difference

errors = np.square(eval_data - predictions)

# space correction
errors_space_corrected = errors/avgresponse

# time correction
errors_time_corrected = np.zeros(errors_space_corrected.shape)
for i in range(2, len(errors)):
    errors_time_corrected[i] = np.prod(errors_space_corrected[i-2:i+1], axis=0)
    
errors_corrected = errors_time_corrected

In [None]:
# make plots

nplots = 5
plotids = np.random.choice(len(eval_data), size=nplots)

for i in plotids:
    fig,axs = plt.subplots(figsize=(30,6), ncols=5)
    plot_histogram(eval_data[i,:,:,0], fig=fig, ax=axs[0])
    plot_histogram(predictions[i,:,:,0], fig=fig, ax=axs[1])
    plot_histogram(errors[i,:,:,0], fig=fig, ax=axs[2], caxrange=(-0.0001, 0.01))
    plot_histogram(errors_space_corrected[i,:,:,0], fig=fig, ax=axs[3], caxrange=(-0.001, 0.5))
    plot_histogram(errors_time_corrected[i,:,:,0], fig=fig, ax=axs[4], caxrange=(-0.001, 0.1))
    axs[0].text(0.02, 1.02, 'Run: {}, lumi: {}'.format(eval_runs[i], eval_lumis[i]), transform=axs[0].transAxes, fontsize=12)

In [None]:
# check how many cells are above a certain threshold

thresholds = [0.01, 0.05, 0.1, 0.5, 1]
cmap = mpl.colormaps.get_cmap('jet')
colors = [cmap(val) for val in np.linspace(0.1, 0.9, num=len(thresholds))]
ncells = []
for threshold in thresholds:
    ncells.append( np.sum(np.sum(errors_corrected>threshold, axis=1), axis=1)[:,0] )

fig, ax = plt.subplots()
for i, (threshold, ncell) in enumerate(zip(thresholds, ncells)):
    ax.hist(ncell, histtype='step', linewidth=2,
            color=colors[i], label='Threshold {}'.format(threshold),
            range=(0, 300), bins=30)
    ax.set_yscale('log')
    ax.legend()
    ax.set_xlabel('Number of cells above threshold', fontsize=12)
    ax.set_ylabel('Number of lumisections', fontsize=12)

In [None]:
# plot examples from across the range

threshold = 0.1
ncells = np.sum(np.sum(errors_corrected>threshold, axis=1), axis=1)[:,0]
ncellranges = ([
    (0, 3),
    (4, 10),
    (10, 100)
])

all_indices = np.array(range(len(eval_data)))
for ncellrange in ncellranges:
    mask = ((ncells>=ncellrange[0]) & (ncells<=ncellrange[1]))
    candidate_indices = all_indices[mask]
    nplots = 3
    plotids = np.random.choice(candidate_indices, size=nplots)
    print('Running on cell range {}'.format(ncellrange))
    print('Found {} candidate lumisections'.format(len(candidate_indices)))

    for i in plotids:
        fig,axs = plt.subplots(figsize=(24,6), ncols=5)
        plot_histogram(eval_data[i,:,:,0], fig=fig, ax=axs[0])
        plot_histogram(predictions[i,:,:,0], fig=fig, ax=axs[1])
        plot_histogram(errors[i,:,:,0], fig=fig, ax=axs[2], caxrange=(-0.0001, 0.01))
        plot_histogram(errors_corrected[i,:,:,0], fig=fig, ax=axs[3], caxrange=(-0.001, 0.5))
        plot_histogram(errors_time_corrected[i,:,:,0], fig=fig, ax=axs[4], caxrange=(-0.001, 0.1))
        axs[0].text(0.02, 1.02, 'Run: {}, lumi: {}'.format(eval_runs[i], eval_lumis[i]), transform=axs[0].transAxes, fontsize=12)
    plt.show()