In [None]:
# imports

# external modules
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import importlib
# framework modules
sys.path.append('../')
import plotting.plottools
importlib.reload(plotting.plottools)
from plotting.plottools import plot_histogram
import models.modeldefs
importlib.reload(models.modeldefs)
from models.modeldefs import model_dummy
from models.modeldefs import model_ecal_endcap
# local modules
import prepare_training_set
importlib.reload(prepare_training_set)
from prepare_training_set import prepare_training_data_from_files

In [None]:
# load the training set

#file = '../data/data/ZeroBias-Run2023C-PromptReco-v1-DQMIO-PixelPhase1-Tracks-PXForward-clusterposition_xy_ontrack_PXDisk_+1_preprocessed.parquet'
#file = '../data/data/ZeroBias-Run2023C-PromptReco-v1-DQMIO-PixelPhase1-Tracks-PXForward-clusterposition_xy_ontrack_PXDisk_+2_preprocessed.parquet'
file = '../data/data/ZeroBias-Run2023C-PromptReco-v1-DQMIO-PixelPhase1-Tracks-PXForward-clusterposition_xy_ontrack_PXDisk_+3_preprocessed.parquet'

kwargs = ({
    'verbose': True,
    'entries_threshold': 10000,
    'skip_first_lumisections': 5
})
(training_data, training_runs, training_lumis) = prepare_training_data_from_files([file], **kwargs)

In [None]:
# make a mask where values are always zero

shape_mask = (np.sum(training_data, axis=0)==0)[:,:,0]

In [None]:
# make model and training settings
input_shape = training_data.shape[1:]
model = model_dummy(input_shape)
loss = 'mse'
optimizer = 'adam'
batch_size = 32
epochs = 10
validation_split = 0.1

# compile model
model.compile(
  loss=loss,
  optimizer=optimizer
)

# do training
history = model.fit(
    training_data, training_data,
    batch_size=batch_size,
    epochs=epochs,
    verbose=True,
    shuffle=True,
    validation_split=validation_split
)

In [None]:
# evaluate the model

predictions = model.predict(training_data)
predictions[predictions<0] = 0.
predictions[:,shape_mask] = 0.

In [None]:
# calculate squared difference

errors = np.square(training_data - predictions)
avg_response = np.mean(errors, axis=0)
avg_response[avg_response==0] = 1
errors_corrected = errors/avg_response

In [None]:
# make plots

nplots = 5
plotids = np.random.choice(len(training_data), size=nplots)

for i in plotids:
    fig,axs = plt.subplots(figsize=(24,6), ncols=4)
    plot_histogram(training_data[i,:,:,0], fig=fig, ax=axs[0])
    plot_histogram(predictions[i,:,:,0], fig=fig, ax=axs[1])
    plot_histogram(errors[i,:,:,0], fig=fig, ax=axs[2], caxrange=(-0.01, 0.1))
    plot_histogram(errors_corrected[i,:,:,0], fig=fig, ax=axs[3], caxrange=(-0.01, 5.))
    axs[0].text(0.02, 1.02, 'Run: {}, lumi: {}'.format(training_runs[i], training_lumis[i]), transform=axs[0].transAxes, fontsize=12)

In [None]:
# store the model

dosave = False
if dosave:
    modelname = 'test_model_20231109_pxdisk+1_era2023Cv1.keras'
    model.save(modelname)