In [122]:
import sys
sys.path.append('../')

import numpy as np
import matplotlib.pyplot as plt
import importlib
import pickle
from scipy.stats import wasserstein_distance

import utils.metrics as metrics

In [123]:
pickle_path = '/beegfs/desy/user/buhmae/6_PointCloudDiffusion/output/metrics/'

dict_real = pickle.load(open(pickle_path + 'merge_dict_10-90GeV_500000_g4.pickle', 'rb'))
dict_ddpm = pickle.load(open(pickle_path + 'merge_dict_10-90GeV_500000_ddpm.pickle', 'rb'))
dict_edm = pickle.load(open(pickle_path + 'merge_dict_10-90GeV_500000_edm.pickle', 'rb'))
dict_cm = pickle.load(open(pickle_path + 'merge_dict_10-90GeV_500000_cm.pickle', 'rb'))

print(dict_cm.keys())

dict_keys(['e_sum', 'hits', 'occ', 'hits_noThreshold', 'binned_layer_e', 'binned_radial_e', 'incident_energy', 'cog_x', 'cog_y', 'cog_z'])


In [124]:
# combine observables in a single array

obs_real = metrics.get_event_observables_from_dict(dict_real)
obs_ddpm = metrics.get_event_observables_from_dict(dict_ddpm)
obs_edm = metrics.get_event_observables_from_dict(dict_edm)
obs_cm = metrics.get_event_observables_from_dict(dict_cm)

print(obs_real.shape)

mean_real, std_real = np.mean(obs_real, axis=0).reshape(1,-1), np.std(obs_real, axis=0).reshape(1,-1)

print(mean_real.shape)

(500000, 26)
(1, 26)


In [125]:
# standardise the data
def standardize(ary, mean, std):
    return (ary - mean) / std

obs_std_real = standardize(obs_real, mean=mean_real, std=std_real)
obs_std_ddpm = standardize(obs_ddpm, mean=mean_real, std=std_real)
obs_std_edm = standardize(obs_edm, mean=mean_real, std=std_real)
obs_std_cm = standardize(obs_cm, mean=mean_real, std=std_real)

In [126]:
# plot all features

# for i in range(obs_std_real.shape[1]):
#     h = plt.hist(obs_std_real[:,i], bins=50, alpha=0.5, label='g4')
#     plt.hist(obs_std_ddpm[:,i], bins=h[1], label='ddpm', histtype='step')
#     plt.hist(obs_std_edm[:,i], bins=h[1], label='edm', histtype='step')
#     plt.hist(obs_std_cm[:,i], bins=h[1], label='cm', histtype='step')
#     plt.legend(loc='best')
#     plt.xlabel('feature {}'.format(i))
#     plt.yscale('log')
#     plt.show()

In [127]:
importlib.reload(metrics)

means, stds = metrics.calc_wdist(obs_std_real, obs_std_ddpm, iterations=10, batch_size=50_000)

print(means.shape)

print('scores for ddpm: ')
print('cog_x: ' + str(means[0].round(2)) + ' +- ' + str(stds[0].round(2)))
print('cog_y: ' + str(means[1].round(2)) + ' +- ' + str(stds[1].round(2)))
print('cog_z: ' + str(means[2].round(2)) + ' +- ' + str(stds[2].round(2)))
print('occ: ' + str(means[3].round(2)) + ' +- ' + str(stds[3]))
print('sampling_fraction: ' + str(means[4].round(2)) + ' +- ' + str(stds[4].round(2)))
print('hits: ' + str(means[5].round(2)) + ' +- ' + str(stds[5].round(2)))

mean, std = metrics.combine_scores(means[6:16], stds[6:16])
print('binned_layer_e: ' + str(mean.round(2)) + ' +- ' + str(std.round(2)))

mean, std = metrics.combine_scores(means[16:26], stds[16:26])
print('binned_radial_e: ' + str(mean.round(2)) + ' +- ' + str(std.round(2)))

(26,)
scores for ddpm: 
cog_x: 0.04 +- 0.0
cog_y: 0.01 +- 0.01
cog_z: 0.09 +- 0.0
occ: 0.02 +- 0.003202603641419365
sampling_fraction: 0.11 +- 0.0
hits: 0.16 +- 0.01
binned_layer_e: 0.02 +- 0.01
binned_radial_e: 0.39 +- 0.01


In [128]:
means, stds = metrics.calc_wdist(obs_std_real, obs_std_edm, iterations=10, batch_size=50_000)

print(means.shape)

print('scores for edm: ')
print('cog_x: ' + str(means[0].round(2)) + ' +- ' + str(stds[0].round(2)))
print('cog_y: ' + str(means[1].round(2)) + ' +- ' + str(stds[1].round(2)))
print('cog_z: ' + str(means[2].round(2)) + ' +- ' + str(stds[2].round(2)))
print('occ: ' + str(means[3].round(2)) + ' +- ' + str(stds[3]))
print('sampling_fraction: ' + str(means[4].round(2)) + ' +- ' + str(stds[4].round(2)))
print('hits: ' + str(means[5].round(2)) + ' +- ' + str(stds[5].round(2)))

mean, std = metrics.combine_scores(means[6:16], stds[6:16])
print('binned_layer_e: ' + str(mean.round(2)) + ' +- ' + str(std.round(2)))

mean, std = metrics.combine_scores(means[16:26], stds[16:26])
print('binned_radial_e: ' + str(mean.round(2)) + ' +- ' + str(std.round(2)))

(26,)
scores for edm: 
cog_x: 0.02 +- 0.0
cog_y: 0.04 +- 0.01
cog_z: 0.08 +- 0.0
occ: 0.04 +- 0.0028397902746659007
sampling_fraction: 0.26 +- 0.01
hits: 0.15 +- 0.01
binned_layer_e: 0.04 +- 0.01
binned_radial_e: 0.12 +- 0.01


In [129]:
means, stds = metrics.calc_wdist(obs_std_real, obs_std_cm, iterations=10, batch_size=50_000)

print(means.shape)

print('scores for cm: ')
print('cog_x: ' + str(means[0].round(2)) + ' +- ' + str(stds[0].round(2)))
print('cog_y: ' + str(means[1].round(2)) + ' +- ' + str(stds[1].round(2)))
print('cog_z: ' + str(means[2].round(2)) + ' +- ' + str(stds[2].round(2)))
print('occ: ' + str(means[3].round(2)) + ' +- ' + str(stds[3]))
print('sampling_fraction: ' + str(means[4].round(2)) + ' +- ' + str(stds[4].round(2)))
print('hits: ' + str(means[5].round(2)) + ' +- ' + str(stds[5].round(2)))

mean, std = metrics.combine_scores(means[6:16], stds[6:16])
print('binned_layer_e: ' + str(mean.round(2)) + ' +- ' + str(std.round(2)))

mean, std = metrics.combine_scores(means[16:26], stds[16:26])
print('binned_radial_e: ' + str(mean.round(2)) + ' +- ' + str(std.round(2)))

(26,)
scores for cm: 
cog_x: 0.03 +- 0.0
cog_y: 0.01 +- 0.0
cog_z: 0.1 +- 0.01
occ: 0.06 +- 0.006442048164576228
sampling_fraction: 0.1 +- 0.01
hits: 0.16 +- 0.01
binned_layer_e: 0.02 +- 0.01
binned_radial_e: 0.08 +- 0.02


In [130]:
importlib.reload(metrics)
means, stds = metrics.calc_wdist(obs_std_real, obs_std_real[::-1], iterations=10, batch_size=50_000)

print(means.shape)

print('scores for truth (Geant4 with itself reversed (so no event is paird with itself)): ')
print('cog_x: ' + str(means[0].round(2)) + ' +- ' + str(stds[0].round(2)))
print('cog_y: ' + str(means[1].round(2)) + ' +- ' + str(stds[1].round(2)))
print('cog_z: ' + str(means[2].round(2)) + ' +- ' + str(stds[2].round(2)))
print('occ: ' + str(means[3].round(2)) + ' +- ' + str(stds[3]))
print('sampling_fraction: ' + str(means[4].round(2)) + ' +- ' + str(stds[4].round(2)))
print('hits: ' + str(means[5].round(2)) + ' +- ' + str(stds[5].round(2)))

mean, std = metrics.combine_scores(means[6:16], stds[6:16])
print('binned_layer_e: ' + str(mean.round(2)) + ' +- ' + str(std.round(2)))

mean, std = metrics.combine_scores(means[16:26], stds[16:26])
print('binned_radial_e: ' + str(mean.round(2)) + ' +- ' + str(std.round(2)))

(26,)
scores for truth (Geant4 with itself reversed (so no event is paird with itself)): 
cog_x: 0.01 +- 0.0
cog_y: 0.01 +- 0.0
cog_z: 0.01 +- 0.0
occ: 0.01 +- 0.002199245915907843
sampling_fraction: 0.01 +- 0.0
hits: 0.01 +- 0.0
binned_layer_e: 0.01 +- 0.01
binned_radial_e: 0.01 +- 0.01
