In [None]:
# imports

# external modules
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import importlib
# framework modules
sys.path.append('../')
import plotting.plottools
importlib.reload(plotting.plottools)
from plotting.plottools import plot_histogram
import models.modeldefs
importlib.reload(models.modeldefs)
from models.modeldefs import model_dummy
from models.modeldefs import model_ecal_endcap
# local modules
import prepare_training_set
importlib.reload(prepare_training_set)
from prepare_training_set import prepare_training_data_from_files

In [None]:
# load the training set

file = '../data/data/ZeroBias-Run2023C-PromptReco-v1-DQMIO-PixelPhase1-Tracks-PXForward-clusterposition_xy_ontrack_PXDisk_+1_preprocessed.parquet'

kwargs = ({
    'verbose': True,
    'entries_threshold': 10000,
    'skip_first_lumisections': 5,
    'veto_patterns': [np.zeros((2,2)), np.zeros((3,1)), np.zeros((1,3))]
})
(train_data, training_runs, training_lumis) = prepare_training_data_from_files([file], **kwargs)

In [None]:
# make a mask where values are often zero

shape_mask = (np.sum(training_data[:,:,:,0]==0, axis=0)>len(training_data)/2.)

fig,ax = plt.subplots()
plot_histogram(shape_mask, fig=fig, ax=ax, caxrange=(-0.01,1))
ax.text(0.02, 1.02, 'Shape mask', transform=ax.transAxes, fontsize=12)

In [None]:
# make model and training settings
input_shape = training_data.shape[1:]
model = model_dummy(input_shape)
loss = 'mse'
optimizer = 'adam'
batch_size = 32
epochs = 10
validation_split = 0.1

# compile model
model.compile(
  loss=loss,
  optimizer=optimizer
)

# do training
history = model.fit(
    training_data, training_data,
    batch_size=batch_size,
    epochs=epochs,
    verbose=True,
    shuffle=True,
    validation_split=validation_split
)

In [None]:
# evaluate the model

training_predictions = model.predict(training_data)
training_predictions[training_predictions<0] = 0.
training_predictions[:,shape_mask] = 0.

In [None]:
# calculate squared difference

training_errors = np.square(training_data - training_predictions)
avg_response = np.square(np.mean(training_data, axis=0)) # average occupancy
#avg_response = np.mean(training_errors, axis=0) # average error
avg_response[avg_response==0] = 1
training_errors_corrected = training_errors/avg_response

fig,ax = plt.subplots()
caxrange = None
#caxrange = (-0.001, 0.01)
plot_histogram(avg_response[:,:,0], fig=fig, ax=ax, caxrange=caxrange)
ax.text(0.02, 1.02, 'Average response', transform=ax.transAxes, fontsize=12)

In [None]:
# make plots of instances in training set

nplots = 5
plotids = np.random.choice(len(training_data), size=nplots)

for i in plotids:
    fig,axs = plt.subplots(figsize=(24,6), ncols=4)
    plot_histogram(training_data[i,:,:,0], fig=fig, ax=axs[0])
    plot_histogram(training_predictions[i,:,:,0], fig=fig, ax=axs[1])
    plot_histogram(training_errors[i,:,:,0], fig=fig, ax=axs[2], caxrange=(-0.001, 0.01))
    plot_histogram(training_errors_corrected[i,:,:,0], fig=fig, ax=axs[3], caxrange=(-0.01,0.5))
    axs[0].text(0.02, 1.02, 'Run: {}, lumi: {}'.format(training_runs[i], training_lumis[i]), transform=axs[0].transAxes, fontsize=12)

In [None]:
# load an evaluation set that is the complement of the training set

kwargs = ({
    'verbose': True,
    'entries_threshold': 10000,
    'skip_first_lumisections': 5,
    'required_patterns': [np.zeros((2,2)), np.zeros((3,1)), np.zeros((1,3))]
})
(eval_data, eval_runs, eval_lumis) = prepare_training_data_from_files([file], **kwargs)

eval_predictions = model.predict(eval_data)
eval_predictions[eval_predictions<0] = 0.
eval_predictions[:,shape_mask] = 0.

eval_errors = np.square(eval_data - eval_predictions)
eval_errors_corrected = eval_errors/avg_response

In [None]:
nplots = 5
plotids = np.random.choice(len(eval_data), size=nplots)

for i in plotids:
    fig,axs = plt.subplots(figsize=(24,6), ncols=4)
    plot_histogram(eval_data[i,:,:,0], fig=fig, ax=axs[0])
    plot_histogram(eval_predictions[i,:,:,0], fig=fig, ax=axs[1])
    plot_histogram(eval_errors[i,:,:,0], fig=fig, ax=axs[2], caxrange=(-0.001, 0.01))
    plot_histogram(eval_errors_corrected[i,:,:,0], fig=fig, ax=axs[3], caxrange=(-0.01, 0.5))
    axs[0].text(0.02, 1.02, 'Run: {}, lumi: {}'.format(eval_runs[i], eval_lumis[i]), transform=axs[0].transAxes, fontsize=12)

In [None]:
# store the model

dosave = False
if dosave:
    modelname = 'test_model_20231109_pxdisk+1_era2023Cv1.keras'
    model.save(modelname)