# Todo:

- plots for cross-validation

# 0. Setup & Imports

In [None]:
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import h5py
import os
import models
import numpy as np
from tensorflow.keras.models import Model
import tensorflow as tf
from random import randint
import data_generators
import train_network
from etmiss_utils import get_etmiss

os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"

# 1. Load data

In [None]:
f = h5py.File("data/sample_00_fixed.h5", "r")

In [None]:
number_to_test = 100
sk = f["SK"][:number_to_test]
cssk = f["CSSK"][:number_to_test]
cluster = f["cluster"][:number_to_test]
vsk = f["VorSK"][:number_to_test]
X = np.asarray([cluster, sk, vsk, cssk])
X = np.moveaxis(X, 0, -1)
X = np.expand_dims(X, 1)

Y = f["truth_nm_barcode"][:number_to_test]
Y = [get_etmiss(y[:, np.newaxis]) for y in Y]

In [None]:
plt.hist(Y)

# 2. Load model

In [None]:
%%capture
mdl = models.BB_model(tracks=False)
mdl.load_weights("trained_models/2604_tracksFalse_2020-11-01__weights.h5")

In [None]:
met_predictions = np.asarray([mdl.predict(x) for x in X])[:,0,:,:,0]
met_predictions = [get_etmiss(x[:, np.newaxis]) for x in met_predictions]

In [None]:
delta_etmiss = Y - np.asarray(met_predictions)

In [None]:
plt.hist(met_predictions)

# 1. Performance Plot

In [None]:
fig = plt.figure()
spec = gridspec.GridSpec(ncols=1, nrows=4, figure=fig, hspace=0)
ax1 = fig.add_subplot(spec[0, 0])
ax2 = fig.add_subplot(spec[1:3, 0])
ax3 = fig.add_subplot(spec[3, 0])

ax1.axes.tick_params(axis="x", bottom=False, labelbottom=False)
ax2.axes.tick_params(axis="x", bottom=False, labelbottom=False)

ax1.set_ylabel("#Events")
ax2.set_ylabel("$\Delta E_T^\mathrm{miss}$ $[GeV]$")
ax3.set_xlabel("$E_T^\mathrm{miss}$ $[GeV]$")
ax3.set_ylabel("$\\frac{\mathrm{SK}}{\mathrm{NN}}$")

ax1.hist(Y)
ax2.plot(delta_etmiss)
ax3.plot(np.asarray([get_etmiss(s) for s in sk])/met_predictions)

# Extract weights of first layer

In [None]:
w = mdl.layers[2].get_weights()[0]

In [None]:
plt.imshow(w[:,:,0,0])
plt.ylabel("phi bins")
plt.xlabel("eta bins")

# Visualise activation of first layer

In [None]:
index = 1
sk = f["SK"][index]
cssk = f["CSSK"][index]
cluster = f["cluster"][index]
vsk = f["VorSK"][index]
X = np.asarray([sk, cssk, vsk, cluster])
X = np.moveaxis(X, 0, -1)
X = np.expand_dims(X, 0)

In [None]:
first_layer_model = Model(inputs=mdl.input,
                                 outputs=mdl.get_layer('conv2d').output)
first_layer_activation = first_layer_model.predict(X)

In [None]:
plt.imshow(np.transpose(first_layer_activation[0,:,:,0]))
cb =  plt.colorbar()

cb.ax.set_ylabel("ET [GeV]")
plt.xlabel("$\eta$")
plt.ylabel("$\phi$")

plt.yticks([0, 31, 63], ["$-\pi$", "$0$", "$\pi$"])
plt.xticks([0, 24, 49], ["$-2.5$", "$0$", "$2.5$"])

# Gradient ascent input image

In [None]:
step = 0.01  # Gradient ascent step size
num_octave = 3  # Number of scales at which to run gradient ascent
octave_scale = 1.4  # Size ratio between scales
iterations = 20  # Number of ascent steps per scale
max_loss = 15.0

outputs_dict = dict(
    [
        (layer.name, layer.output)
        for layer in mdl.layers
    ]
)

# Set up a model that returns the activation values for every target layer
# (as a dict)
feature_extractor = Model(inputs=mdl.inputs, outputs=outputs_dict)

In [None]:
def compute_loss(input_image):
    features = feature_extractor(input_image)
    # Initialize the loss
    loss = tf.zeros(shape=())
    for name in features.keys():
        coeff = 1
        activation = features[name]
        # We avoid border artifacts by only involving non-border pixels in the loss.
        scaling = tf.reduce_prod(tf.cast(tf.shape(activation), "float32"))
        loss += coeff * tf.reduce_sum(tf.square(activation[:, 2:-2, 2:-2, :])) / scaling
    return loss

In [None]:
def gradient_ascent_step(img, learning_rate):
    with tf.GradientTape() as tape:
        tape.watch(img)
        loss = compute_loss(img)
    # Compute gradients.
    grads = tape.gradient(loss, img)
    # Normalize gradients.
    grads /= tf.maximum(tf.reduce_mean(tf.abs(grads)), 1e-6)
    img += learning_rate * grads
    return loss, img

def gradient_ascent_loop(img, iterations, learning_rate, max_loss=None):
    for i in range(iterations):
        loss, img = gradient_ascent_step(img, learning_rate)
        if max_loss is not None and loss > max_loss:
            break
        print("... Loss value at step %d: %.2f" % (i, loss))
    return img

In [None]:
from copy import deepcopy

original_img = deepcopy(X)
original_shape = original_img.shape[1:3]

successive_shapes = [original_shape]
for i in range(1, num_octave):
    shape = tuple([int(dim / (octave_scale ** i)) for dim in original_shape])
    successive_shapes.append(shape)
successive_shapes = successive_shapes[::-1]
shrunk_original_img = tf.image.resize(original_img, successive_shapes[0])

img = tf.identity(original_img)  # Make a copy
for i, shape in enumerate(successive_shapes):
    print("Processing octave %d with shape %s" % (i, shape))
    img = tf.image.resize(img, shape)
    img = gradient_ascent_loop(
        img, iterations=iterations, learning_rate=step, max_loss=max_loss
    )
    upscaled_shrunk_original_img = tf.image.resize(shrunk_original_img, shape)
    same_size_original = tf.image.resize(original_img, shape)
    lost_detail = same_size_original - upscaled_shrunk_original_img

    img += lost_detail
    shrunk_original_img = tf.image.resize(original_img, shape)

In [None]:
plt.imshow(np.transpose(img[0,:,:,3]))
cb =  plt.colorbar()

cb.ax.set_ylabel("ET [GeV]")
plt.xlabel("$\eta$")
plt.ylabel("$\phi$")

plt.yticks([0, 31, 63], ["$-\pi$", "$0$", "$\pi$"])
plt.xticks([0, 24, 49], ["$-2.5$", "$0$", "$2.5$"])