### For ploting the results of the gan training
So first run signal_GAN_train.py and then signal_GAN_test.py

In [None]:
import os
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns

# my perfered plotting settings
pal = sns.color_palette("colorblind")
plt.style.use('plot_style.txt')
from matplotlib.ticker import AutoMinorLocator
from NuRadioReco.utilities import units, fft

os.environ['CUDA_VISIBLE_DEVICES'] = '1'

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices'

In [None]:
models = pd.read_pickle(f'GAN_losses/signal_gan_results_transconv-incept-m14-10deg-05split-fixed.pkl')
energy_err = models['energy_err']
index_best = np.argmin(energy_err)
print('best model:', models.iloc[index_best]['name'])
# note that the best model has changed slightly since the results for the report since 
# I re-ran the test of them and I guess the generated signals were different that time.
# only a 1% difference though

### Load and normalize the data
The normalization changes depending on the range of angles used

In [None]:
data = np.load('/mnt/md0/aholmberg/data/signal_had_14_10deg.npy')
condition = data[:,:2]
shower_n = data[:,3]
signals = data[:,3:]
signals_filtered = np.load('/mnt/md0/aholmberg/data/signal_had_14_filtered_10deg.npy')

latent_dim = 112
N = 896
n_index = 1.78
cherenkov_angle = np.arccos(1. / n_index)

condition_norm = condition.copy()  # normalize to get range (0,1)
condition_norm[:, 0] = (np.log10(condition_norm[:, 0]) - 15)/(19 - 15)
#condition_norm[:, 1] = ((condition_norm[:, 1] - cherenkov_angle) / units.deg + 2.5)/ 5
condition_norm[:, 1] = ((condition_norm[:, 1] - cherenkov_angle) / units.deg + 5)/ 10

### Load a model from a random grid search from train_signal_GAN.py
Specify the name and directory of the saved models

In [None]:
name = 'run4-lr=5e-05-critic_filters=24-generator_filters=48-generator_k_size=15' 
g_model = keras.models.load_model(f'/mnt/md0/aholmberg/GAN_models/transconv-incept-m14-10deg-05split-fixed/gen_{name}/', compile=False)
g_model.compile()

test_split = 0.5
ind = int(signals_filtered.shape[0]*test_split)

test_signals = signals_filtered[ind:, :]
test_conditions = condition[ind:, :]
test_conditions_norm = condition_norm[ind:, :]

# idepends on the test split. Makes sure that thera are ten signals of the same conditions
i = 0

test_signal = test_signals[i:i+10, :]
test_condition = test_conditions[i:i+10, :]
test_condition_norm = test_conditions_norm[i:i+10, :]

latent_vec = tf.random.normal((10, latent_dim))

### Predict ten signals

In [None]:
pred_signals = g_model([latent_vec, test_condition_norm], training=False)
pred_signals.shape

### Scale those ten signals depending on the scaling used when traning the model

In [None]:
#pred_signals_scaled = pred_signals / (np.expand_dims(1e19/test_condition[:, 0], axis=-1) * (np.expand_dims(((test_condition[:, 1]/units.deg - cherenkov_angle/units.deg))**4, axis=-1) + 1)/3)
pred_signals_scaled = pred_signals / (np.expand_dims(1e19/test_condition[:, 0], axis=-1) * (np.expand_dims(((test_condition[:, 1]/units.deg - cherenkov_angle/units.deg))**4, axis=-1) + 1)/6)
""" pred_signals_scaled = np.zeros_like(pred_signals)
for i in range(pred_signals.shape[0]):
    pred_signals_scaled[i,:] = pred_signals[i,:]/(1e19/test_condition[i, 0])
 """
print(f'E: {test_condition[:,0]},\n theta: {test_condition[:,1]/units.deg},\n norm: {test_condition_norm}')

### Plot a signal and a generated signal

In [None]:
x = np.linspace(0,89.6, 896)
plt.plot(x[100:770], pred_signals_scaled[1,100:770], label='generated signal')
plt.plot(x[100:770], test_signal[1,100:770], '--', label='true signal')
plt.legend()
plt.xlabel('time [ns]')
plt.ylabel('amplitude [V/m]')
#plt.savefig('gensig.png')

### Fourier transform the real and predicted signals

In [None]:
dt = 1e-10 * units.second
sr = 1/dt
ff = np.fft.rfftfreq(N, dt)

pred_spectrum = np.zeros((10, 449))
real_spectrum = np.zeros((10, 449))
for index in range(10):
    pred_spectrum[index, :] = np.abs(fft.time2freq(pred_signals_scaled[index, :], sampling_rate=sr))
    real_spectrum[index, :] = np.abs(fft.time2freq(test_signal[index, :], sampling_rate=sr))

### Plot the mean and bounds of generated signals snd their spectra

In [None]:
mean_pred = np.mean(pred_signals_scaled, axis=0)
max_pred = np.max(pred_signals_scaled, axis=0)
min_pred = np.min(pred_signals_scaled, axis=0)
mean_real = np.mean(test_signal, axis=0)
max_real = np.max(test_signal, axis=0)
min_real = np.min(test_signal, axis=0)

mean_pred_spectrum = np.mean(pred_spectrum, axis=0)
max_pred_spectrum = np.max(pred_spectrum, axis=0)
min_pred_spectrum = np.min(pred_spectrum, axis=0)
mean_real_spectrum = np.mean(real_spectrum, axis=0)
max_real_spectrum = np.max(real_spectrum, axis=0)
min_real_spectrum = np.min(real_spectrum, axis=0)

fig, ax = plt.subplots(2,2, figsize=(10,10), sharey='row')
ax[0, 0].plot(x[100:770], mean_pred[100:770], 'r', label=f'mean of generated signals')
ax[0, 0].plot(x[100:770], mean_real[100:770], '--b',label=f'mean of true signals')

ax[0, 1].fill_between(x[100:770], min_pred[100:770], max_pred[100:770], label=f'min/max bounds for generated signals', color='r')
ax[0, 1].fill_between(x[100:770], min_real[100:770], max_real[100:770], label=f'min/max bounds for real signals', color='b')

ax[1, 0].plot(ff, mean_pred_spectrum, 'r', label='mean spectrum of generated signals')
ax[1, 0].plot(ff, mean_real_spectrum, 'b', label='mean spectrum of real signals')

ax[1, 1].fill_between(ff, min_pred_spectrum, max_pred_spectrum, label=f'min/max bounds of the spectrum for generated signals', color='r')
ax[1, 1].fill_between(ff, min_real_spectrum, max_real_spectrum, label=f'min/max bounds of the spectrum for real signals', color='b')

ax[0, 0].legend(loc=1)
ax[0, 1].legend(loc=1)
ax[1, 0].legend(loc=1)
ax[1, 1].legend(loc=1)
ax[0, 0].set_xlabel('time [ns]')
ax[0, 1].set_xlabel('time [ns]')
ax[0, 0].set_ylabel('amplitude [V/m]')
ax[1, 0].set_xlabel('frequency [GHz]')
ax[1, 0].set_ylabel('amplitude [V/m/GHz]')
ax[1, 1].set_xlabel('frequency [GHz]')
theta = r'$\theta$'
deg = r'$^\circ$'
fig.suptitle(f'Signal and spectrum for 10 generated signals and 10 real signals \n with different shower profiles, E={test_condition[0,0]:.2e} [eV] {theta}={test_condition[0, 1]/ units.deg :.2f} [{deg}]')
#fig.savefig('plots/40deg_gen_sig_E181e17_theta6839.png')

### Now predict all of the test signals

In [None]:
i = 0
test_signals = signals_filtered[ind+(i%10):, :]
test_conditions = condition[ind+(i%10):, :]
test_conditions_norm = condition_norm[ind+(i%10):, :]
latent_vec = tf.random.normal((test_conditions.shape[0], latent_dim))

In [None]:
pred_signals = g_model.predict([latent_vec, test_conditions_norm])

### Scale those test signals

In [None]:
#pred_signals_scaled = pred_signals.copy() / (np.expand_dims(1e19/test_conditions[:, 0], axis=-1) * (np.expand_dims(((test_conditions[:, 1]/units.deg - cherenkov_angle/units.deg))**4, axis=-1) + 1)/3)
pred_signals_scaled = pred_signals.copy() / (np.expand_dims(1e19/test_conditions[:, 0], axis=-1) * (np.expand_dims(((test_conditions[:, 1]/units.deg - cherenkov_angle/units.deg))**4, axis=-1) + 1)/6)
""" pred_signals_scaled = np.zeros_like(pred_signals)
for i in range(pred_signals.shape[0]):
    pred_signals_scaled[i,:] = pred_signals[i,:]/(1e19/test_conditions[i, 0]) """

### Load the critic and compile it

In [None]:
#c_model = keras.models.load_model(f'/mnt/md0/aholmberg/GAN_models/transconv-incept_1/crit_{name}/', compile=False)
#c_model = keras.models.load_model(f'/mnt/md0/aholmberg/GAN_models/transconv-incept_1/crit_run0_lr=1e-05_critic_filters=8_generator_filters=32_generator_k_size=5/', compile=False)
c_model = keras.models.load_model(f'/mnt/md0/aholmberg/GAN_models/transconv-incept-m14-10deg-05split-fixed/crit_run4-lr=5e-05-critic_filters=24-generator_filters=48-generator_k_size=15/', compile=False)
c_model.compile()

### Normalize test signals so the critic can compare generated with real

In [None]:
#normalized_signals = test_signals * (np.expand_dims(1e19/test_conditions[:, 0], axis=-1) * (np.expand_dims(((test_conditions[:, 1]/units.deg - cherenkov_angle/units.deg))**4, axis=-1) + 1)/3)
normalized_signals = test_signals * (np.expand_dims(1e19/test_conditions[:, 0], axis=-1) * (np.expand_dims(((test_conditions[:, 1]/units.deg - cherenkov_angle/units.deg))**4, axis=-1) + 1)/6)
# normalized_signals = np.zeros_like(test_signals)
# for i in range(test_signals.shape[0]):
#    normalized_signals[i, :] = test_signals[i, :]*(1e19/test_conditions[i, 0])


In [None]:
fake_logits = c_model.predict([pred_signals, test_conditions_norm])
real_logits = c_model.predict([normalized_signals, test_conditions_norm])

### Scatter estimated wasserstein distance

In [None]:
""" d_cost = np.zeros((n_rows, ))
for i in range(n_rows):
    d_cost[i] = cWGANGP_model_def.critic_loss(real_sig=real_logits[i*10:i*10+10], fake_sig=fake_logits[i*10:i*10+10]) """
plt.scatter(range(np.abs(fake_logits-real_logits).size), np.abs(fake_logits-real_logits), s=5)

### Plot signal with the lowest w-distance

In [None]:
diff = fake_logits-real_logits
min = np.abs(diff).argmin() #  min w-dist
max = np.abs(diff).argmax() #  max w-dist

max2 = np.argmax(diff[diff != np.amin(diff)]) #  second max w-dist

fig, ax = plt.subplots(1, 1, figsize=(8,5))
ax.plot(x[400:560], test_signals[min, 400:560]/units.millivolt, label='real signal')
ax.plot(x[400:560], pred_signals_scaled[min, 400:560]/units.millivolt, '--', label='pred signal')
ax.xaxis.set_minor_locator(AutoMinorLocator(5))
ax.yaxis.set_minor_locator(AutoMinorLocator(5))
ax.legend(frameon=False)
ax.set_xlabel('Time [ns]')
ax.set_ylabel('Amplitude [mV/m]')
#fig.savefig('thesis/Exjobb-rapport/figures/c_score_best.pdf')

### Plot the signal with the highest w-distance

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8,5))

ax.plot(x[350:500], test_signals[max2, 350:500]/units.millivolt, label='real signal')
ax.plot(x[350:500], pred_signals_scaled[max2, 350:500]/units.millivolt, '--', label='pred signal')

ax.xaxis.set_minor_locator(AutoMinorLocator(5))
ax.yaxis.set_minor_locator(AutoMinorLocator(5))
ax.legend(frameon=False)
ax.set_xlabel('Time [ns]')
ax.set_ylabel('Amplitude [mV/m]')
fig.savefig('thesis/Exjobb-rapport/figures/c_score_worst.pdf')

print(f'max diff={diff[max2]}, real logit={real_logits[max2]}, fake logits={fake_logits[max2]}, given condition: {test_conditions[max2,0]:.3E}, {test_conditions[max2,1]/units.deg:.2f}')
print(f'min diff: {diff[min]}, real logit={real_logits[min]}, fake logits={fake_logits[min]}, given condition: {test_conditions[min,0]:.3E}, {test_conditions[min,1]/units.deg:.2f}')

### See if there is a clear pattern in the w-distance

In [None]:
df = pd.DataFrame(np.stack((diff.squeeze(), (test_conditions[:,1] - cherenkov_angle)/units.deg, np.log10(test_conditions[:,0])), axis=-1), columns=['w-dist', 'angle', 'log-E'])
sns.pairplot(df)

### compute fluence

In [None]:
from scipy import integrate as quad

In [None]:
real_energy = quad.simpson(np.power(test_signals, 2), x=x, axis=-1)
gen_energy = quad.simpson(np.power(pred_signals_scaled, 2), x=x, axis=-1)

### Compute error in peak to peak amplitude and fluence

In [None]:
avg_real_energy = np.zeros((real_energy.shape[0]//10, ))
std_real_energy = np.zeros((real_energy.shape[0]//10, ))
avg_peak2peak = np.zeros((real_energy.shape[0]//10, ))
std_peak2peak = np.zeros((real_energy.shape[0]//10, ))
for i in range(avg_real_energy.shape[0]):
    avg_real_energy[i] = np.mean(real_energy[i*10:i*10+10], axis=0)
    std_real_energy[i] = np.std(real_energy[i*10:i*10+10], axis=0)
    max = np.max(test_signals[i*10:i*10+10], axis=-1)
    min = np.min(test_signals[i*10:i*10+10], axis=-1)
    avg_peak2peak[i] = np.mean(max-min)
    std_peak2peak[i] = np.std(max-min)

energy_err = np.zeros((pred_signals_scaled.shape[0], ))
peak_err = np.zeros((pred_signals_scaled.shape[0], ))
for i in range(pred_signals_scaled.shape[0]):
    err = np.abs(gen_energy[i] - avg_real_energy[i//10])
    if err > std_real_energy[i//10]:
        err = err**2
    else:
        err = 0
    energy_err[i] = err
    
    max = np.max(pred_signals_scaled[i])
    min = np.min(pred_signals_scaled[i])
    p_err = np.abs(avg_peak2peak[i//10] - (max - min))
    if err > std_peak2peak[i//10]:
        p_err = p_err**2
    else:
        p_err = 0
    peak_err[i] = p_err


plt.scatter(np.repeat(avg_peak2peak,10), peak_err/np.repeat(avg_peak2peak,10))

### Load and plot losses

In [None]:
history = pd.read_pickle(f'GAN_losses/history_run4-lr=5e-05-critic_filters=24-generator_filters=48-generator_k_size=15.pkl')
history.tail()

In [None]:
# moving average
window = 16
loss_length = history['g_loss'].shape[0]//window
mean_g_loss = np.zeros((loss_length,))
mean_d_loss = np.zeros((loss_length,))
mean_d_cost = np.zeros((loss_length,))
mean_gp = np.zeros((loss_length,))
for i in range(loss_length):
    mean_g_loss[i] = np.mean(history['g_loss'][i*window:i*window+window])
    mean_d_loss[i] = np.mean(history['c_loss'][i*window:i*window+window])
    mean_d_cost[i] = np.mean(history['c_cost'][i*window:i*window+window])
    mean_gp[i//window] = np.mean(history['gp'][i*window:i*window+window])
    

In [None]:
# geanerator loss
fig, ax = plt.subplots(1, 1, figsize=(8,5))
x = np.linspace(1,100, 2000*4)
ax.xaxis.set_minor_locator(AutoMinorLocator(5))
ax.yaxis.set_minor_locator(AutoMinorLocator(5))
plt.plot(x, mean_g_loss, label='generator loss', color=pal[0])
#plt.legend(frameon=False)
ax.set_xlabel('Epochs')
ax.set_ylabel('Loss')
#fig.savefig('thesis/Exjobb-rapport/figures/gan_g_loss_best.pdf')

In [None]:
# critic losses
fig, ax = plt.subplots(figsize=(8,5))
ax.plot([-5, 105], [0, 0], ':', color=pal[3], alpha=1)
#ax.yaxis.grid(True)
ax.set_xlim(-5, 105)
ax.plot(x, mean_d_loss, label='total critic loss', color=pal[0])
ax.plot(x, mean_gp*10, label='gradient penalty', color=pal[1])
ax.plot(x, mean_d_cost, label='wasserstein loss', color=pal[2])
ax.xaxis.set_minor_locator(AutoMinorLocator(5))
ax.yaxis.set_minor_locator(AutoMinorLocator(5))
ax.legend(frameon=False)
ax.set_xlabel('Epochs')
ax.set_ylabel('Loss')
#fig.savefig('thesis/Exjobb-rapport/figures/gan_c_loss_best.pdf')

### Plot the generator

In [None]:
#keras.utils.plot_model(g_model, rankdir='LR', show_layer_names=False, show_shapes=False, dpi=300, to_file='g_model_example.png')

### Load results from random search

In [None]:
models = pd.read_pickle(f'GAN_losses/signal_gan_results_transconv-incept-m14-10deg-05split-fixed.pkl')
peak_err = models['peak_err']
p_max_run = np.argmax(peak_err)
p_min_run = np.argmin(peak_err)
energy_err = models['energy_err']
e_max_run = np.argmax(energy_err)
e_min_run = np.argmin(energy_err)
w_dist = models['w_dist']
w_max_run = np.argmax(w_dist)
w_min_run = np.argmin(w_dist)
print(p_max_run, e_max_run, w_max_run)
print(p_min_run, e_min_run, w_min_run)
models.sort_values('energy_err')

In [None]:
models.iloc[13]['name']

### Create a latex table of top five and bottom five models

In [None]:
tmp = models.sort_values('energy_err', ascending=True)[:10]
tmp['energy_err'] = (tmp['energy_err']*100).map("{:.2f}".format) + '%'
tmp['peak_err'] = (tmp['peak_err']*100).map("{:.2f}".format) + '%'
tmp['w_dist'] = (tmp['w_dist']).map("{:.3f}".format)
tmp['lr'] = (tmp['lr']).map(float)
tmp

In [None]:
print(tmp.to_latex(
    index=False,
    float_format='%.0E',
    columns=['energy_err', 'peak_err', 'w_dist', 'lr', 'critic_filters', 'generator_filters', 'generator_k_size'],
    header=['Energy error', 'p2p error', 'Wasserstein', 'learning rate', 'C filters', 'G filters', 'G kernel size']
    ))