In [None]:
# imports

import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import importlib
sys.path.append('../')
import plotting.plottools
importlib.reload(plotting.plottools)
from plotting.plottools import plot_histogram
# local modules
import models.modeldefs
importlib.reload(models.modeldefs)
from models.modeldefs import model_dummy
from models.modeldefs import model_ecal_endcap

In [None]:
# get some example histograms

fname = '../data/data/ZeroBias-Run2023C-PromptReco-v1-DQMIO-PixelPhase1-Tracks-PXForward-clusterposition_xy_ontrack_PXDisk_+1_preprocessed.parquet'
df = pd.read_parquet(fname)
nhists = len(df)
xbins = df['Xbins'][0]
ybins = df['Ybins'][0]
hists = np.array([df['histo'][i].reshape(xbins,ybins) for i in range(nhists)])
runs = np.array(df['fromrun'])
lumis = np.array(df['fromlumi'])
entries = np.array(df['entries'])
print('Shape of hists array: {}'.format(hists.shape))
print('Runs: {}'.format(runs))
print('Lumis: {}'.format(lumis))
print('Entries: {}'.format(entries))

In [None]:
# selections

entries_mask = (entries > 1000)
print('Passing lumisections: {} ({:.2f} %)'.format(np.sum(entries_mask), np.sum(entries_mask)/nhists*100))

training_mask = entries_mask
print('Training lumisections: {} ({:.2f} %)'.format(np.sum(training_mask), np.sum(training_mask)/nhists*100))
training_data = hists[training_mask]
training_data = np.expand_dims(training_data, 3)
print(training_data.shape)

In [None]:
# make a mask where values are always zero

shape_mask = (np.sum(training_data, axis=0)==0)[:,:,0]

In [None]:
# divide training data by its average

#avg_occupancy = np.mean(training_data, axis=0)
#avg_occupancy[avg_occupancy==0] = 1
#training_data = training_data/avg_occupancy

In [None]:
# make model and training settings
input_shape = training_data.shape[1:]
model = model_dummy(input_shape)
loss = 'mse'
optimizer = 'adam'
batch_size = 32
epochs = 5
validation_split = 0.1

# compile model
model.compile(
  loss=loss,
  optimizer=optimizer
)

# do training
history = model.fit(
    training_data, training_data,
    batch_size=batch_size,
    epochs=epochs,
    verbose=True,
    shuffle=True,
    validation_split=validation_split
)

In [None]:
# evaluate the model

predictions = model.predict(training_data)
predictions[predictions<0] = 0.
predictions[:,shape_mask] = 0.

In [None]:
# calculate squared difference

errors = np.square(training_data - predictions)
avg_response = np.mean(errors, axis=0)
avg_response[avg_response==0] = 1
errors_corrected = errors/avg_response

In [None]:
# make plots

nplots = 5
plotids = np.random.choice(len(training_data), size=nplots)

for i in plotids:
    fig,axs = plt.subplots(figsize=(24,6), ncols=4)
    plot_histogram(training_data[i,:,:,0], fig=fig, ax=axs[0])
    plot_histogram(predictions[i,:,:,0], fig=fig, ax=axs[1])
    plot_histogram(errors[i,:,:,0], fig=fig, ax=axs[2], caxrange=(-0.01, 0.1))
    plot_histogram(errors_corrected[i,:,:,0], fig=fig, ax=axs[3], caxrange=(-0.01, 5.))
    axs[0].text(0.02, 1.02, 'Run: {}, lumi: {}'.format(runs[i], lumis[i]), transform=axs[0].transAxes, fontsize=12)