In [None]:
base_width = 0.47

In [None]:
# balance_images.py
import cv2
import csv
import numpy as np
import os


NUM_CLASSES = 5
MIP = 1
HIP = 2
SHOWER = 3
DIFFUSE = 4
MICHEL = 5

class EventSummary:
    def __init__(self, index, xx, zz, ww, tt, qq):
        """Constructor.

            Args:
                index: The index of the event
                xx: The set of x-coordinates of hits in this event
                zz: The set of z-coordinates of hits in this event
                ww: The half width of the hits in this event
                qq: The set of hit energies in this event
        """
        self.index = index
        self.xx = xx
        self.zz = zz
        self.ww = ww
        self.tt = tt
        self.qq = qq
        self.n_hits = len(tt)
        unique, counts = np.unique(self.tt, return_counts=True)
        cls_freq = dict(zip(unique, counts))
        if SHOWER not in cls_freq:
            cls_freq[SHOWER] = 0
        if MIP not in cls_freq:
            cls_freq[MIP] = 0
        if HIP not in cls_freq:
            cls_freq[HIP] = 0
        if DIFFUSE not in cls_freq:
            cls_freq[DIFFUSE] = 0
        if MICHEL not in cls_freq:
            cls_freq[MICHEL] = 0
        self.delta_class = cls_freq[MIP] - cls_freq[SHOWER]


    def __lt__(self, other):
        """Compare if this EventSummary has a delta_class value less than that of the specified EventSummary.

            Args:
                other: The EventSummary object to compare against

            Returns:
                True if self.delta_class < other.delta_class, False otherwise.
        """
        return self.delta_class < other.delta_class


    def num_hits(self):
        """Return the total number of hits in the event.

            Returns:
                The total number of hits in the event.
        """
        return self.n_hits


class Binning:
    def __init__(self, x_min, x_max, z_min, z_max, block_size, image_width, image_height):
        """Construct the binning for an image.

            Args:
                x_min: The minimum x-coordinate
                x_max: The maximum x-coordinate
                z_min: The minimum z-coordinate
                z_max: The maximum z-coordinate
                block_size: The size of a block in cm
                image_width: The width of the image in pixels
                image_height: The height of the image in pixels
        """
        eps = np.finfo(np.float32).eps
        x_range = (x_max + eps) - (x_min - eps)
        z_range = (z_max + eps) - (z_min - eps)
        n_x = int(np.ceil(x_range / block_size))
        n_z = int(np.ceil(z_range / block_size))

        self.tiles_x = np.linspace(0 - eps, n_x + eps, n_x + 1)
        self.tiles_z = np.linspace(0 - eps, n_z + eps, n_z + 1)
        self.bins_x = np.linspace(0 - eps, block_size + eps, image_width + 1)
        self.bins_z = np.linspace(0 - eps, block_size + eps, image_height + 1)
        self.n_tiles_x = n_x
        self.n_tiles_z = n_z
        self.tile_width = image_width
        self.tile_height = image_height


from tqdm.notebook import tqdm
def preprocess_file(input_file):
    """Generate summary descriptions for events.

        Args:
            input_file: a CSV file containing event information

        Returns:
            A list of EventSummary objects describing the events.
    """
    events = []
    with open(input_file, 'r') as f:
        num_events = len(f.readlines())
    with open(input_file, 'r') as f:
        reader = csv.reader(f)
        for i, row in enumerate(tqdm(reader, desc="Reading events", miniters=100, total=num_events)):
            data = row[1:-1]
            events.append(preprocess_event(i, data))

    return events


def preprocess_event(index, data):
    """Construct summary event description for a single event.

        The input data has the format:
        N Hits,N*{x coord, z coord, class, charge}

        Args:
            index: the index of the event
            data: the set of vertices and hits for the event

        Returns:
            An EventSummary describing the event.
    """
    n_vals = 5
    n_hits = int(data.pop(0))
    expected_vals = n_hits * n_vals
    observed_vals = len(data)

    if expected_vals > observed_vals:
        print("Missing information in input file")
        print(f"Expected {expected_vals} values, observed {observed_vals} values")
        return
    elif expected_vals < observed_vals:
        print("Excess information in input file")
        print(f"Expected {expected_vals} values, observed {observed_vals} values")
        return

    vals_start, vals_finish = 0, observed_vals
    xx = np.array(data[vals_start:vals_finish:n_vals], dtype=float)
    zz = np.array(data[vals_start + 1:vals_finish:n_vals], dtype=float)
    ww = np.array(data[vals_start + 2:vals_finish:n_vals], dtype=float)
    tt = np.array(data[vals_start + 3:vals_finish:n_vals], dtype=int)
    qq = np.array(data[vals_start + 4:vals_finish:n_vals], dtype=float)

    return EventSummary(index, xx, zz, ww, tt, qq)


def make_image(event, output_folder, image_size = (256, 256)):
    """Generate the training/validation set images for a single event.

        Images are output to <output_folder>/Hits and <output_folder>/Truth

        Args:
            event: the EventSummary object
            output_folder: the top-level folder for output images
            image_size: the output image size as a tuple (width, height)
    """
    image_width, image_height = image_size
    xx_lo, xx_hi = event.xx - event.ww, event.xx + event.ww
    x_min, x_max = np.amin(xx_lo), np.amax(xx_hi)
    z_min, z_max = np.amin(event.zz), np.amax(event.zz)
    q_min, q_max = np.amin(event.qq), np.amax(event.qq)
    x_range = x_max - x_min
    z_range = z_max - z_min
    q_range = q_max - q_min

    if 2 * x_range < image_width:
        padding = 0.5 * (image_width / 2. - x_range)
        x_min -= padding
        x_max += padding
        x_range = x_max - x_min
    if 2 * z_range < image_height:
        padding = 0.5 * (image_height / 2. - z_range)
        z_min -= padding
        z_max += padding
        z_range = z_max - z_min

    block_size = 128
    binning = Binning(x_min, x_max, z_min, z_max, block_size, image_width, image_height)
    eps = np.finfo(np.float32).eps
    x_bins = np.linspace(x_min - eps, x_max + eps, image_width + 1)
    z_bins = np.linspace(z_min - eps, z_max + eps, image_height + 1)

    ptx = np.digitize((event.xx - x_min) / block_size, binning.tiles_x) - 1
    ptz = np.digitize((event.zz - z_min) / block_size, binning.tiles_z) - 1
    px = np.digitize((event.xx - x_min) % block_size, binning.bins_x) - 1
    px_lo = np.digitize((xx_lo - x_min) % block_size, binning.bins_x) - 1
    px_hi = np.digitize((xx_hi - x_min) % block_size, binning.bins_x) - 1
    pz = np.digitize((event.zz - z_min) % block_size, binning.bins_z) - 1

    type_histogram = np.zeros((NUM_CLASSES + 1, binning.n_tiles_z, binning.n_tiles_x, binning.tile_height, binning.tile_width), dtype=float)
    input_histogram = np.zeros((binning.n_tiles_z, binning.n_tiles_x, binning.tile_height, binning.tile_width), dtype=float)
    count_histogram = np.zeros_like(input_histogram)
    
    for idx in range(event.n_hits):
        for i, x_idx in enumerate(range(px_lo[idx], px_hi[idx] + 1)):
            x_local = min(xx_lo[idx] + i * base_width, xx_hi[idx])
            ptx_local = np.digitize((x_local - x_min) / block_size, binning.tiles_x) - 1
            px_local = np.digitize((x_local - x_min) % block_size, binning.bins_x) - 1
            if x_idx == px_lo[idx]:
                p_frac = (x_bins[px_local + 1] - x_local) / (x_bins[px_local + 1] - x_bins[px_local])
            elif x_idx == px_hi[idx]:
                p_frac = (x_local - x_bins[px_local]) / (x_bins[px_local + 1] - x_bins[px_local])
            else:
                p_frac = 1
            q_local = event.qq[idx] * (base_width / (2 * event.ww[idx]))
            type_histogram[event.tt[idx]][ptz[idx], ptx_local, (image_height - 1) - pz[idx], px_local] += q_local
            input_histogram[ptz[idx], ptx_local, (image_height - 1) - pz[idx], px_local] += q_local
            count_histogram[ptz[idx], ptx_local, (image_height - 1) - pz[idx], px_local] += 1
    
    truth_histogram = np.zeros((binning.n_tiles_z, binning.n_tiles_x, binning.tile_height, binning.tile_width), dtype=int)
    mip_mask = np.where((type_histogram[MIP] >= type_histogram[HIP]) & (type_histogram[MIP] >= type_histogram[SHOWER]) & (type_histogram[MIP] >= type_histogram[DIFFUSE]) & (type_histogram[MIP] >= type_histogram[MICHEL]) & (type_histogram[MIP] > 0))
    hip_mask = np.where((type_histogram[HIP] > type_histogram[MIP]) & (type_histogram[HIP] >= type_histogram[SHOWER]) & (type_histogram[HIP] >= type_histogram[DIFFUSE]) & (type_histogram[HIP] >= type_histogram[MICHEL]) & (type_histogram[HIP] > 0))
    shower_mask = np.where((type_histogram[SHOWER] > type_histogram[MIP]) & (type_histogram[SHOWER] > type_histogram[HIP]) & (type_histogram[SHOWER] >= type_histogram[DIFFUSE]) & (type_histogram[SHOWER] >= type_histogram[MICHEL]) & (type_histogram[SHOWER] > 0))
    diffuse_mask = np.where((type_histogram[DIFFUSE] > type_histogram[MIP]) & (type_histogram[DIFFUSE] > type_histogram[HIP]) & (type_histogram[DIFFUSE] > type_histogram[SHOWER]) & (type_histogram[DIFFUSE] >= type_histogram[MICHEL]) & (type_histogram[DIFFUSE] > 0))
    michel_mask = np.where((type_histogram[MICHEL] > type_histogram[MIP]) & (type_histogram[MICHEL] > type_histogram[HIP]) & (type_histogram[MICHEL] > type_histogram[SHOWER]) & (type_histogram[MICHEL] > type_histogram[DIFFUSE]) & (type_histogram[MICHEL] > 0))

    truth_histogram[mip_mask] = MIP
    truth_histogram[hip_mask] = HIP
    truth_histogram[shower_mask] = SHOWER
    truth_histogram[diffuse_mask] = DIFFUSE
    truth_histogram[michel_mask] = MICHEL
    
    # Normalise input histograms
    count_histogram[count_histogram == 0] = 1
    input_histogram /= count_histogram
    
    input_histogram = input_histogram.astype(float)
    for tr in range(binning.n_tiles_z):
        for tc in range(binning.n_tiles_x):
            if np.count_nonzero(truth_histogram[tr, tc, ...]) > 10:
                truth_output_folder = os.path.join(output_folder, "Truth")
                truth_filename = os.path.join(truth_output_folder, f"Image_{event.index}_{tr}_{tc}.png")
                with open(truth_filename, 'wb') as file:
                    np.savez_compressed(file, truth_histogram[tr, tc, ...])
                hits_output_folder = os.path.join(output_folder, "Hits")
                hits_filename = os.path.join(hits_output_folder, f"Image_{event.index}_{tr}_{tc}.png")
                with open(hits_filename, 'wb') as file:
                    np.savez_compressed(file, input_histogram[tr, tc, ...])

In [None]:
from tqdm.notebook import tqdm
for view in ['C']:
    file_prefix = f"csv/HitTagTS_CaloHitList"
    output_path = f"Images{view}"
    for path in [os.path.join(output_path, "Hits"), os.path.join(output_path, "Truth")]:
        if not os.path.exists(path):
            os.makedirs(path)
    events = preprocess_file(f"{file_prefix}{view}.csv")
    for i, event in enumerate(tqdm(events, desc="Making images", miniters=100)):
        if len(event.xx) > 0:
            make_image(event, output_path)