In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import torch
import pprint
import matplotlib.pyplot as plt
import sys
import pickle
import argparse
import logging
import os

# from ginkgo import invMass_ginkgo
# from invMass_ginkgo import *
from invMass_ginkgo_node import *
from ginkgo.utils import get_logger


logger = get_logger(level = logging.WARNING)
fh = logging.FileHandler('spam.log')
fh.setLevel(logging.WARNING)
logger.addHandler(fh)


"""
create node class
understand original output
simplify recursion logic further

define typology of a tree
access data on each node

call algo to generate tree
have a fully defined tree with 4D vectors for each node

generate likelihood for each tree 
compute eq (12)
eq(14) simplifies to eq(8), apply 8 repeatedly when matching l and r

"""

  from .autonotebook import tqdm as notebook_tqdm


'\ncreate node class\nunderstand original output\nsimplify recursion logic further\n\ndefine typology of a tree\naccess data on each node\n\ncall algo to generate tree\nhave a fully defined tree with 4D vectors for each node\n\ngenerate likelihood for each tree \ncompute eq (12)\neq(14) simplifies to eq(8), apply 8 repeatedly when matching l and r\n\n'

In [3]:
"""Parameters"""


rate2 = torch.tensor(8.)

# Parameters to get ~<10 constituents to test the trellis algorithm
pt_min = torch.tensor(4.**2)

### Physics inspired parameters to get ~ between 20 and 50 constituents
W_rate = 3.
QCD_rate = 1.5

QCD_mass = 30.
class ginkgo_simulator():
    def __init__(self,
                 rate,
                 pt_cut,
                 M2start,
                 Nsamples,
                 minLeaves,
                 maxLeaves,
                 maxNTry,
                 jetType, 
                 jetP,
                 root_rate= 1.5,
                ):
        
        self.root_rate = root_rate
        self.rate = rate
        self.pt_cut = pt_cut
        self.M2start = torch.tensor(M2start) # mass squared to start with
        self.Nsamples = Nsamples
        self.minLeaves = minLeaves
        self.maxLeaves = maxLeaves
        self.maxNTry = maxNTry
        self.jetType = jetType # W or QCD 
        self.jetM = np.sqrt(M2start) # mass to start with
        self.jetdir = np.array([1,1,1])
        self.jetP = jetP
        self.jetvec = self.jetP * self.jetdir / np.linalg.norm(self.jetdir)
        self.jet4vec = np.concatenate(([np.sqrt(self.jetP ** 2 + self.jetM ** 2)], self.jetvec))
        logger.debug(f"jet4vec = {self.jet4vec}")
        
        if jetType == "W":
            # defined in paper, W jets have a different root rate
            self.rate=torch.tensor([self.root_rate,self.rate])
        elif jetType == "QCD":
            # QCD jets maintain the same rate throughout
            self.rate=torch.tensor([self.rate,self.rate])
        else:
            raise ValueError("Choose a valid jet type between W or QCD")



    def simulator(self):

        simulator = Simulator(jet_p = self.jet4vec,
                                         pt_cut = float(self.pt_cut),
                                         Delta_0 = self.M2start,
                                         M_hard = self.jetM ,
                                         num_samples = int(self.Nsamples),
                                         minLeaves = int(self.minLeaves),
                                         maxLeaves = int(self.maxLeaves),
                                         maxNTry = int(self.maxNTry)
                                         )
        return simulator
       
    def generate(self):
        
        simulator = self.simulator()
        jet_list = simulator(self.rate)

        logger.debug(f"---"*10)
        logger.debug(f"jet_list = {jet_list}")
        
        return jet_list

In [4]:
# the range of leaves that you would consider as valid generations
minLeaves = 3
maxLeaves = 100

# number of jets you wish to generate
Nsamples = 1

# exponential rate parameter
rate = 1.5

# mass squared cut off to yield leaves
pt_cut =  torch.tensor(1.1**2)

# mass squared to start with
# M2start = 80.**2
M2start = 5.**2

# the maximum times you are willing to try to get Nsamples
maxNTry = 1

# jetP=400.
jetP=4.

In [5]:
jetType ="QCD"


ginkgo = ginkgo_simulator(
                 rate,
                 pt_cut ,
                 M2start,
                 Nsamples,
                 minLeaves,
                 maxLeaves,
                 maxNTry,
                 jetType, 
                 jetP)

QCD_jets = ginkgo.generate()

Node 0
 Vec4: [6.40312424 2.30940108 2.30940108 2.30940108]
 Decay Rate: 1
 Mass Squared: tensor(25.)
 Log Likelihood: -6.7095221514246655
 DIJ List: -6.7095221514246655 0.23277160797200752 2.181810397924136 0.18819615069106774

Node 1
 Vec4: [ 0.43585504  0.00507162  0.29365142 -0.18978641]
 Decay Rate: tensor(0.2418)
 Mass Squared: tensor(0.0677)
 Log Likelihood: 0
 DIJ List:

Node 2
 Vec4: [5.96726919 2.30432946 2.01574966 2.49918748]
 Decay Rate: tensor(0.7996)
 Mass Squared: tensor(19.9892)
 Log Likelihood: -6.491612225332328
 DIJ List: -6.491612225332328 0.004446976506679298 0.03499581593659533 0.002336933582067466

Node 3
 Vec4: [5.62359385 2.1321367  1.82306563 2.34298392]
 Decay Rate: tensor(0.9138)
 Mass Squared: tensor(18.2657)
 Log Likelihood: -6.5000160034085
 DIJ List: -6.5000160034085 0.04507537556494515 0.34541334394099155 0.0009524282247785871

Node 4
 Vec4: [0.34367547 0.17219281 0.19268407 0.15620362]
 Decay Rate: tensor(0.6934)
 Mass Squared: tensor(0.0269)
 Log Lik

In [6]:
root_rate = 4.
jetType ="W"


ginkgo= ginkgo_simulator(
                 rate,
                 pt_cut ,
                 M2start,
                 Nsamples,
                 minLeaves,
                 maxLeaves,
                 maxNTry,
                 jetType, 
                 jetP,
                 root_rate)

W_jets = ginkgo.generate()

Node 0
 Vec4: [6.40312424 2.30940108 2.30940108 2.30940108]
 Decay Rate: 1
 Mass Squared: tensor(25.)
 Log Likelihood: -7.424617607053604
 DIJ List: -7.424617607053604 0.27142606055655144 2.746521346214231 2.277166215625276

Node 1
 Vec4: [ 2.87295134 -0.47731217  0.77542397  2.54672488]
 Decay Rate: tensor(0.0772)
 Mass Squared: tensor(0.9389)
 Log Likelihood: 0
 DIJ List:

Node 2
 Vec4: [ 3.5301729   2.78671324  1.53397711 -0.23732381]
 Decay Rate: tensor(0.0915)
 Mass Squared: tensor(2.2869)
 Log Likelihood: -3.5746102626705847
 DIJ List: -3.5746102626705847 0.051458010189774826 0.35452474254238914 0.13197287498004287

Node 3
 Vec4: [0.63551152 0.3861836  0.47235067 0.14126178]
 Decay Rate: tensor(0.0941)
 Mass Squared: tensor(0.0117)
 Log Likelihood: 0
 DIJ List:

Node 4
 Vec4: [ 2.8946613   2.40052959  1.0616264  -0.37858559]
 Decay Rate: tensor(0.5886)
 Mass Squared: tensor(1.3461)
 Log Likelihood: -2.6162346895123205
 DIJ List: -2.6162346895123205 0.13395956106005538 0.715673012

In [7]:
def llh(pL, pR, t_cut, lam):
    """
    Take two nodes and return the splitting log likelihood
    """
    tL = pL[0] ** 2 - np.linalg.norm(pL[1::]) ** 2
    tR = pR[0] ** 2 - np.linalg.norm(pR[1::]) ** 2


    pP = pR + pL ## eq (5)


    # Parent invariant mass squared
    tp = pP[0] ** 2 - np.linalg.norm(pP[1::]) ** 2

    if tp<=0 or tL<0 or tR<0:
        return - np.inf

    # We add a normalization factor -np.log(1 - np.exp(- lam))
    # because we need the mass squared to be strictly decreasing.
    # This way the likelihood integrates to 1 for 0<t<t_p.
    # All leaves should have t=0, this is a convention we are
    # taking (instead of keeping their value for t given that
    # it is below the threshold t_cut)
    def get_logp(tP_local, t, t_cut, lam):
        if t > t_cut:
            # Probability of the shower to stop F_s
            return -np.log(1 - np.exp(- (1. - 1e-3)*lam)) + np.log(lam) - np.log(tP_local) - lam * t / tP_local

        else: # For leaves we have t<t_cut
            t_upper = min(tP_local,t_cut) #There are cases where tp2 < t_cut
            log_F_s = -np.log(1 - np.exp(- (1. - 1e-3)*lam)) + np.log(1 - np.exp(-lam * t_upper / tP_local))
            return log_F_s


    if tp <= t_cut:
        #If the pairing is not allowed
        logLH = - np.inf

    elif tL >=(1 - 1e-3)* tp or tR >=(1 - 1e-3)* tp:
        # print("The pairing is not allowed because tL or tR are greater than tP")
        logLH = - np.inf

    elif np.sqrt(tL) + np.sqrt(tR) > np.sqrt(tp):
        print("Breaking invariant mass inequality condition")
        logLH = - np.inf


    else:
        # We sample a unit vector uniformly over the 2-sphere, so the angular likelihood is 1/(4*pi)

        tpLR = (np.sqrt(tp) - np.sqrt(tL)) ** 2
        tpRL = (np.sqrt(tp) - np.sqrt(tR)) ** 2

        logpLR = np.log(1/2)+ get_logp(tp, tL, t_cut, lam) + get_logp(tpLR, tR, t_cut, lam) #First sample tL
        logpRL = np.log(1/2)+ get_logp(tp, tR, t_cut, lam) + get_logp(tpRL, tL, t_cut, lam) #First sample tR

        logp_split = logsumexp(np.asarray([logpLR, logpRL]))

        logLH = (logp_split + np.log(1 / (4 * np.pi)) ) ## eq (8)

    return logLH, tp

In [8]:
import pickle

def leafToDict(leaf):
    leafDict = {}
    leafDict["vec4"] = leaf.vec4
    leafDict["delta"] = leaf.delta
    return leafDict
def nodeListToDictList(nodeList):
    dictList = []
    for i in nodeList:
        dictList.append(leafToDict(i))
    return dictList
def pickleDictList(dictList):
    with open("data.p", "wb") as f:
        pickle.dump(dictList, f)


In [None]:
pickleDictList(nodeListToDictList(QCD_jets))

with open('data.p', 'rb') as f:
    output = pickle.load(f)
    print(output)

[{'vec4': array([6.40312424, 2.30940108, 2.30940108, 2.30940108]), 'delta': tensor(25.)}, {'vec4': array([5.22164106, 1.63163427, 2.54118569, 2.76262546]), 'delta': tensor(10.5136)}, {'vec4': array([ 1.18148318,  0.6777668 , -0.23178461, -0.45322438]), 'delta': tensor(0.6774)}, {'vec4': array([ 0.80623597,  0.29277151, -0.33386672,  0.43932248]), 'delta': tensor(0.2598)}, {'vec4': array([4.41540528, 1.33886282, 2.8750525 , 2.32330307]), 'delta': tensor(4.0396)}, {'vec4': array([4.01252078, 1.20019124, 2.56538149, 2.14296604]), 'delta': tensor(3.4864)}, {'vec4': array([0.4028843 , 0.13867152, 0.30967088, 0.18033693]), 'delta': tensor(0.0147)}, {'vec4': array([ 0.81839637,  0.11743826,  0.67010929, -0.01895218]), 'delta': tensor(0.2066)}, {'vec4': array([3.19412441, 1.08275298, 1.8952722 , 2.16191822]), 'delta': tensor(0.7641)}]


In [None]:
from node import *
import numpy as np

# finish function
# test function against actual values
# modify code to output 

"""


why is it left leaning???

3 cases

2 leaves
1 leave 1 internal
2 internal


tenserflow version (1.15 or something)



"""
cut_off = pt_cut # cut off rate defined previously

# Checking leaves
# leaves = [(index, i) for index, i in enumerate(QCD_jets) if i.left is None and i.right is None]
# print(*sorted([i[0] for i in leaves]))

# Getting Leaves
leaves = [i for i in QCD_jets if i.left is None and i.right is None]


print()

def testRecontruct(parent, left, right, cut_off):
    parentDecayRate = parent.decay_rate

    def reconstruct(left, right, cut_off):
        nonlocal parentDecayRate
        parVec4 = left.vec4 + right.vec4
        parLH, parDelta = llh(left.vec4, right.vec4, cut_off, parentDecayRate)
        parent = jetNode(vec4 = parVec4,
                         left = left,
                         right = right,
                         decay_rate = parentDecayRate, # template value
                         delta = parDelta,
                         logLH = parLH
                         )
        return parent
    res = reconstruct(left, right, cut_off)
    
    print(parent.vec4, res.vec4)
    print(parent.decay_rate, res.decay_rate)
    print(parent.delta, res.delta)
    print(parent.logLH, res.logLH)
    print()
    return res

# testRecontruct(QCD_jets[0], QCD_jets[1], QCD_jets[2], cut_off)

def rec_test(node, cut_off):
    if node.right and node.left:
        testRecontruct(node, node.left, node.right, cut_off)
        rec_test(node.left, cut_off)
        rec_test(node.right, cut_off)
# rec_test(QCD_jets[0], cut_off)



    

2 3 6 7 8



In [1]:
from llhTF import *
def testRecontructTF(parent, left, right, cut_off):
    parentDecayRate = parent.decay_rate

    def reconstructTF(left, right, cut_off):
        nonlocal parentDecayRate
        parVec4 = left.vec4 + right.vec4
        parLH, parDelta = llhTF(left.vec4, right.vec4, cut_off, parentDecayRate)
        parent = jetNode(vec4 = parVec4,
                         left = left,
                         right = right,
                         decay_rate = parentDecayRate, # template value
                         delta = parDelta,
                         logLH = parLH
                         )
        return parent
    res = reconstructTF(left, right, cut_off)
    
    print(parent.vec4, res.vec4)
    print(parent.decay_rate, res.decay_rate)
    print(parent.delta, res.delta)
    print(parent.logLH, res.logLH)
    print()
    return res

# testRecontruct(QCD_jets[0], QCD_jets[1], QCD_jets[2], cut_off)

def rec_test(node, cut_off):
    if node.right and node.left:
        testRecontructTF(node, node.left, node.right, cut_off)
        rec_test(node.left, cut_off)
        rec_test(node.right, cut_off)
rec_test(QCD_jets[0], cut_off)

: 

: 