In [1]:
import pyprob
import numpy as np
import torch

from showerSim import invMass_ginkgo

import matplotlib as mpl
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.cm as mpl_cm

import sklearn as skl
from sklearn.linear_model import LinearRegression
%matplotlib inline

Cannot import dbm.gnu: No module named '_gdbm'




# Define initial conditions for the simulator

In [2]:
jetM = 80. # parent mass -> W
jetdir = np.array([1,1,1]) # direction
jetP = 400. # magnitude
jetvec = jetP * jetdir / np.linalg.norm(jetdir)

jet4vec = np.concatenate(([np.sqrt(jetP**2 + jetM**2)], jetvec))

## Define a function that takes (self, jet) and outputs True for the condition we want

In [6]:
# Condition on the number of leaves
def num_leaves_cut(self, jet):
    return len(jet["leaves"]) >= 27

In [5]:
def get_subjet_pT(jet, side="left"):
    if side == "left":
        subjet_left_4vec = jet["content"][jet["tree"][0][0]]
        subjet_left_pT = np.sqrt(subjet_left_4vec[1]**2 + subjet_left_4vec[2]**2)
        return subjet_left_pT
    elif side == "right":
        subjet_right_4vec = jet["content"][jet["tree"][0][1]]
        subjet_right_pT= np.sqrt(subjet_right_4vec[1]**2 + subjet_right_4vec[2]**2)
        return subjet_right_pT
    return None

In [4]:
def subjet_pT_cut(self, jet):
    subjet_left_4vec = jet["content"][jet["tree"][0][0]]
    subjet_right_4vec = jet["content"][jet["tree"][0][1]]
    subjet_left_pT = np.sqrt(subjet_left_4vec[1]**2 + subjet_left_4vec[2]**2)
    subjet_right_pT= np.sqrt(subjet_right_4vec[1]**2 + subjet_right_4vec[2]**2)
    #return (275 <= subjet_left_pT <= 400) or (275 <= subjet_right_pT <= 400)
    return (subjet_left_pT <= 40) and (270 <= subjet_right_pT)

## Make instance of the simulator

In [7]:
simulator = invMass_ginkgo.SimulatorModel(rate=[3, 1.5], # exponential dsitribution rate
                                     jet_p=jet4vec,  # parent particle 4-vector
                                     pt_cut=10.,  # minimum pT for resulting jet
                                     Delta_0=torch.tensor(jetM**2),  # parent particle mass squared -> needs tensor
                                     M_hard=jetM,  # parent particle mass
                                     minLeaves=30,  # minimum number of jet constituents
                                     maxLeaves=40,  # maximum " "
                                     bool_func=num_leaves_cut,
                                     suppress_output=True)

In [9]:
jet = simulator()  # Make sure the forward pass works

## Generate traces for the prior distribution

In [10]:
prior = simulator.prior(num_traces=5000)

Time spent  | Time remain.| Progress             | Trace     | Traces/sec
0d:00:00:55 | 0d:00:04:32 | ###----------------- |  845/5000 | 15.27       

  P = np.sqrt(tp)/2 * np.sqrt( 1 - 2 * (t_child+t_sib)/tp + (t_child - t_sib)**2 / tp**2 )


0d:00:05:39 | 0d:00:00:00 | #################### | 5000/5000 | 14.72       


## Train the NN for inference compilation

In [None]:
simulator.learn_inference_network(
    num_traces=5000,
    proposal_mixture_components=3,
    observe_embeddings={'bool_func': {'dim': 32, 'depth': 3}}
)

Continuing to train existing inference network...
Total number of parameters: 132,008
Train. time | Epoch| Trace     | Init. loss| Min. loss | Curr. loss| T.since min | Learn.rate| Traces/sec
New layers, address: 186__forward___traverse___traverse_rec___traverse_..., distribution: Uniform
New layers, address: 230__forward___traverse___traverse_rec___traverse_..., distribution: Uniform
New layers, address: 442__forward___traverse___traverse_rec___traverse_..., distribution: TruncatedExponential
New layers, address: 496__forward___traverse___traverse_rec___traverse_..., distribution: TruncatedExponential
New layers, address: 186__forward___traverse___traverse_rec___traverse_..., distribution: Uniform
New layers, address: 230__forward___traverse___traverse_rec___traverse_..., distribution: Uniform
New layers, address: 442__forward___traverse___traverse_rec___traverse_..., distribution: TruncatedExponential
New layers, address: 496__forward___traverse___traverse_rec___traverse_..., distrib

  logger.debug(f"pR inv mass from p^2 in lab  frame: {np.sqrt(pR_mu[0] ** 2 - np.linalg.norm(pR_mu[1::]) ** 2)}")


------------------------------ | +2.87e+01 | [1m[32m+2.79e+01[0m | [1m[32m+2.79e+01[0m | [1m[32m0d:00:00:00[0m | +1.00e-03 | 2.2  
New layers, address: 496__forward___traverse___traverse_rec___traverse_..., distribution: TruncatedExponential
Total addresses: 165, parameters: 143,045
New layers, address: 186__forward___traverse___traverse_rec___traverse_..., distribution: Uniform1.00e-03 | 2.4                            
New layers, address: 230__forward___traverse___traverse_rec___traverse_..., distribution: Uniform
New layers, address: 442__forward___traverse___traverse_rec___traverse_..., distribution: TruncatedExponential
New layers, address: 496__forward___traverse___traverse_rec___traverse_..., distribution: TruncatedExponential
Total addresses: 169, parameters: 146,441
New layers, address: 186__forward___traverse___traverse_rec___traverse_..., distribution: Uniforme-03 | 2.4                                
New layers, address: 230__forward___traverse___traverse_rec___tra

## Generate traces for the posterior distribution

In [None]:
posterior = simulator.posterior(inputs=rate,
                                num_traces=15*5000,
                                inference_engine=pyprob.InferenceEngine.IMPORTANCE_SAMPLING_WITH_INFERENCE_NETWORK,
                                observe={"bool_func": 1})

## Investigate latent variables

In [None]:
index = 1

In [None]:
exp_prior_dist = prior.filter(lambda t: t.named_variables.get('L_decay' + str(index) + 'True') is not None)
exp_post_dist = posterior.filter(lambda t: t.named_variables.get('L_decay' + str(index) + 'True') is not None)

exp_prior_dist = exp_prior_dist.map(lambda t: t['L_decay' + str(index) + 'True'])
exp_post_dist = exp_post_dist.map(lambda t: t['L_decay' + str(index) + 'True'])

In [None]:
w = exp_prior_dist.weights.numpy()
print("Prior effective sample size:", w.sum()**2 / (w**2).sum())

w = exp_post_dist.weights.numpy()
print("Posterior effective sample size:", w.sum()**2 / (w**2).sum())

In [None]:
bins = np.linspace(0,1,35)

fig = plt.figure()
fig.set_size_inches(8,8)
ax = fig.add_subplot(111)
bins = np.linspace(0,1,35)
c1,_,_ = ax.hist([x.item() for x in exp_prior_dist.values],
                 weights=exp_prior_dist.weights.numpy(),
                 bins=bins, alpha = 0.5, label='prior (node index '+str(index)+')', density=True);
c2,_,_ = ax.hist([x.item() for x in exp_post_dist.values],
                 weights=exp_post_dist.weights.numpy(),
                 bins=bins, alpha = 0.5, label = 'posterior (node index '+str(index)+')', density=True);
ax.legend()
ax.set_title("Ginkgo + PyProb Test Conditioned on (left_subtree_pT <= 40 AND right_subtree_pT >= 270)")
ax.set_xlabel('Truncated Exponential Samples')
ax.set_ylabel("Normalized Bin Count")