# DL track/shower tagging image generation

This notebook is designed to take CSV files generated by the <code>Train</code> function of the <code>DlHitTrackShowerIdAlgorithm</code>. This algorithm generates CSV files for each of the U. V and W views and the code below will run over each of those files.
    
Most of the cells below will not need any editing, but at the very bottom of the notebook you will find some additional markdown that describes what you may need to edit (essentially just some file locations).

In [None]:
# Automatically reload external libraries that change
%reload_ext autoreload
%autoreload 2

# If a matplotlib plot command is issued, display the results in the notebook
%matplotlib inline

In [None]:
import cv2
import csv
import numpy as np
import os
import matplotlib.pyplot as plt

In [None]:
#pixel_size = { "U": 0.46669998765, "V": 0.46669998765, "W": 0.479000002146 }
pixel_size = { "I": 0.46669998765, "C": 0.479000002146 }
SHOWER = 1
TRACK = 2
DIFFUSE = 3

In [None]:
class EventSummary:
    def __init__(self, index, xx, zz, tt, pp, qq):
        """Constructor.

            Args:
                index: The index of the event
                xx: The set of x-coordinates of hits in this event
                zz: The set of z-coordinates of hits in this event
                qq: The set of hit energies in this event
        """
        self.index = index
        self.xx = xx
        self.zz = zz
        self.tt = tt
        self.pp = pp
        self.qq = qq
        self.n_hits = len(tt)


class Binning:
    def __init__(self, x_min, x_max, z_min, z_max, block_size, image_width, image_height):
        """Construct the binning for an image.

            Args:
                x_min: The minimum x-coordinate
                x_max: The maximum x-coordinate
                z_min: The minimum z-coordinate
                z_max: The maximum z-coordinate
                block_size: The size of a block in cm
                image_width: The width of the image in pixels
                image_height: The height of the image in pixels
        """
        eps = np.finfo(np.float32).eps
        x_range = (x_max + eps) - (x_min - eps)
        z_range = (z_max + eps) - (z_min - eps)
        n_x = int(np.ceil(x_range / block_size))
        n_z = int(np.ceil(z_range / block_size))

        self.tiles_x = np.linspace(0 - eps, n_x + eps, n_x + 1)
        self.tiles_z = np.linspace(0 - eps, n_z + eps, n_z + 1)
        self.bins_x = np.linspace(0 - eps, block_size + eps, image_width + 1)
        self.bins_z = np.linspace(0 - eps, block_size + eps, image_height + 1)
        self.n_tiles_x = n_x
        self.n_tiles_z = n_z
        self.tile_width = image_width
        self.tile_height = image_height

In [None]:
def preprocess_file(input_file):
    """Generate summary descriptions for events.

        Args:
            input_file: a CSV file containing event information

        Returns:
            A list of EventSummary objects describing the events.
    """
    events = []
    with open(input_file, 'r') as f:
        reader = csv.reader(f)
        for i, row in enumerate(reader):
            data = row[1:-1]
            events.append(preprocess_event(i, data))

    return events


def preprocess_event(index, data):
    """Construct summary event description for a single event.

        The input data has the format:
        N Hits,N*{x coord, z coord, class, charge}

        Args:
            index: the index of the event
            data: the set of vertices and hits for the event

        Returns:
            An EventSummary describing the event.
    """
    n_vals = 5
    n_hits = int(data.pop(0))
    expected_vals = n_hits * n_vals
    observed_vals = len(data)

    if expected_vals > observed_vals:
        print("Missing information in input file")
        print(f"Expected {expected_vals} values, observed {observed_vals} values")
        return
    elif expected_vals < observed_vals:
        print("Excess information in input file")
        print(f"Expected {expected_vals} values, observed {observed_vals} values")
        return

    vals_start, vals_finish = 0, observed_vals
    xx = np.array(data[vals_start:vals_finish:n_vals], dtype=float)
    zz = np.array(data[vals_start + 1:vals_finish:n_vals], dtype=float)
    tt = np.array(data[vals_start + 2:vals_finish:n_vals], dtype=int)
    pp = np.array(data[vals_start + 3:vals_finish:n_vals], dtype=float)
    qq = np.array(data[vals_start + 4:vals_finish:n_vals], dtype=float)

    return EventSummary(index, xx, zz, tt, pp, qq)


def make_image(event, view, output_folder, image_size = 256):
    """Generate the training/validation set images for a single event.

        Images are output to <output_folder>/Hits and <output_folder>/Truth

        Args:
            event: the EventSummary object
            output_folder: the top-level folder for output images
            image_size: the output image size as a tuple (width, height)
    """
    image_width = image_height = image_size
    x_min, x_max = np.amin(event.xx), np.amax(event.xx)
    z_min, z_max = np.amin(event.zz), np.amax(event.zz)
    q_min, q_max = np.amin(event.qq), np.amax(event.qq)
    x_range = x_max - x_min
    z_range = z_max - z_min
    q_range = q_max - q_min

    #temp_hist = np.histogram2d(event.zz, event.xx, bins=[np.linspace(z_min, z_max, 257), np.linspace(x_min, x_max, 257)])[0]
    #temp_hist[temp_hist > 0] = 255
    #plt.imshow(temp_hist)
    #plt.show()

    if 2 * x_range < image_width:
        padding = 0.5 * (image_width / 2. - x_range)
        x_min -= padding
        x_max += padding
        x_range = x_max - x_min
    if 2 * z_range < image_height:
        padding = 0.5 * (image_height / 2. - z_range)
        z_min -= padding
        z_max += padding
        z_range = z_max - z_min

    block_size = image_size * pixel_size[view]
    binning = Binning(x_min, x_max, z_min, z_max, block_size, image_width, image_height)
    eps = np.finfo(np.float32).eps
    x_bins = np.linspace(x_min - eps, x_max + eps, image_width + 1)
    z_bins = np.linspace(z_min - eps, z_max + eps, image_height + 1)

    ptx = np.digitize((event.xx - x_min) / block_size, binning.tiles_x) - 1
    ptz = np.digitize((event.zz - z_min) / block_size, binning.tiles_z) - 1
    px = np.digitize((event.xx - x_min) % block_size, binning.bins_x) - 1
    pz = np.digitize((event.zz - z_min) % block_size, binning.bins_z) - 1

    track_histogram = np.zeros((binning.n_tiles_z, binning.n_tiles_x, binning.tile_height, binning.tile_width), dtype=int)
    track_penalty_histogram = np.zeros_like(track_histogram)
    shower_histogram = np.zeros_like(track_histogram)
    shower_penalty_histogram = np.zeros_like(track_histogram)
    diffuse_histogram = np.zeros_like(track_histogram)
    diffuse_penalty_histogram = np.zeros_like(track_histogram)
    input_histogram = np.zeros((binning.n_tiles_z, binning.n_tiles_x, binning.tile_height, binning.tile_width), dtype=float)

    for idx in range(event.n_hits):
        if event.tt[idx] == SHOWER:
            shower_histogram[ptz[idx], ptx[idx], (image_height - 1) - pz[idx], px[idx]] += event.qq[idx]
            shower_penalty_histogram[ptz[idx], ptx[idx], (image_height - 1) - pz[idx], px[idx]] = max(shower_penalty_histogram[ptz[idx], ptx[idx], (image_height - 1) - pz[idx], px[idx]], event.pp[idx])
        elif event.tt[idx] == TRACK:
            track_histogram[ptz[idx], ptx[idx], (image_height - 1) - pz[idx], px[idx]] += event.qq[idx]
            track_penalty_histogram[ptz[idx], ptx[idx], (image_height - 1) - pz[idx], px[idx]] = max(track_penalty_histogram[ptz[idx], ptx[idx], (image_height - 1) - pz[idx], px[idx]], event.pp[idx])
        else:
            diffuse_histogram[ptz[idx], ptx[idx], (image_height - 1) - pz[idx], px[idx]] += event.qq[idx]
            diffuse_penalty_histogram[ptz[idx], ptx[idx], (image_height - 1) - pz[idx], px[idx]] = max(diffuse_penalty_histogram[ptz[idx], ptx[idx], (image_height - 1) - pz[idx], px[idx]], event.pp[idx])
        input_histogram[ptz[idx], ptx[idx], (image_height - 1) - pz[idx], px[idx]] += event.qq[idx]

    truth_histogram = np.zeros((binning.n_tiles_z, binning.n_tiles_x, binning.tile_height, binning.tile_width), dtype=int)
    track_mask = (track_histogram >= shower_histogram) & (track_histogram >= diffuse_histogram) & (track_histogram > 0)
    shower_mask = (shower_histogram > track_histogram) & (track_histogram >= diffuse_histogram)
    diffuse_mask = (diffuse_histogram > track_histogram) & (diffuse_histogram > shower_histogram)
    
    truth_histogram[diffuse_mask] = DIFFUSE
    truth_histogram[shower_mask] = SHOWER
    truth_histogram[track_mask] = TRACK
    penalty_histogram = np.zeros_like(track_histogram)
    penalty_histogram[track_mask]

    # Normalise input histograms
    q_min, q_max = np.min(input_histogram), np.max(input_histogram)
    q_range = q_max - q_min
    input_histogram = (input_histogram - q_min) / q_range
    #input_histogram[input_histogram > 0] = 255

    for path in [os.path.join(output_folder, "Hits"), os.path.join(output_folder, "Truth")]:
        if not os.path.exists(path):
            os.makedirs(path)
    input_histogram = input_histogram.astype(np.uint8)
    for tr in range(binning.n_tiles_z):
        for tc in range(binning.n_tiles_x):
            #print(f"({tr}, {tc}) = {np.count_nonzero(truth_histogram[tr, tc, ...] == TRACK)}T {np.count_nonzero(truth_histogram[tr, tc, ...] == SHOWER)}S {np.count_nonzero(truth_histogram[tr, tc, ...] == DIFFUSE)}D")
            if np.count_nonzero(truth_histogram[tr, tc, ...]) > 10:
                #print(tr, tc)
                #plt.imshow(input_histogram[tr, tc, ...])
                #plt.show()
                truth_output_folder = os.path.join(output_folder, "Truth")
                truth_filename = os.path.join(truth_output_folder, f"Image_{event.index}_{tr}_{tc}.png")
                with open(truth_filename, 'wb') as file:
                    np.savez_compressed(file, truth_histogram[tr, tc, ...])
                hits_output_folder = os.path.join(output_folder, "Hits")
                hits_filename = os.path.join(hits_output_folder, f"Image_{event.index}_{tr}_{tc}.png")
                with open(hits_filename, 'wb') as file:
                    np.savez_compressed(file, input_histogram[tr, tc, ...])

In [None]:
for view in ['I', 'C']:
    events = preprocess_file(f'csv/HitTagTS_CaloHitList{view}.csv')
    for event in events:
        make_image(event, view, f'Images{view}')