In [2]:
from __future__ import absolute_import, division, print_function
import uproot
import numpy as np
import matplotlib.pyplot as plt

# energyflow imports
import energyflow as ef
from energyflow.archs import *
from energyflow.utils import data_split, remap_pids, to_categorical

from sklearn.metrics import roc_auc_score, roc_curve

In [7]:
h2gg_dir = '/home/sambt/pythia-samples/optimal-classifiers/H2gg-ee/'
h2qq_dir = '/home/sambt/pythia-samples/optimal-classifiers/H2qqbar-ee/'
kern = -1
#CFs = [0.0001,1/3,2/3,1,4/3,5/3,2,7/3,8/3,3]
CFs = [4/3]
CA = 3.0

In [27]:
f = uproot.open(h2qq_dir+'kernel-1/H2qqbar-ee-100k_CF_0.7_CA_3.0/total.root')['EventTree']
gjet_pt = f.array('plead_constit_pt')
gjet_pt[0,:]

array([ 1.19879227,  2.72662809,  4.27254895,  7.4201559 ,  9.65858289,
        9.65858289,  3.70542392,  5.01987503,  4.76586242,  4.76586242,
        3.42240189, 17.53696167, 15.0663135 , 33.60739495, 33.60739495,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.  

In [29]:
for CF in CFs:
    fname_gg = 'kernel{0}/H2gg-ee-100k_CF_{1:.1f}_CA_{2:.1f}/total.root'.format(kern,CF,CA)
    fname_qq = 'kernel{0}/H2qqbar-ee-100k_CF_{1:.1f}_CA_{2:.1f}/total.root'.format(kern,CF,CA)
    
    f_gg = uproot.open(h2gg_dir+fname_gg)['EventTree']
    f_qq = uproot.open(h2qq_dir+fname_qq)['EventTree']
    nev_gg = f_gg.numentries
    nev_qq = f_qq.numentries
    
    gjet_pt = f_gg.array("plead_constit_pt")
    gjet_eta = f_gg.array("plead_constit_eta")
    gjet_phi = f_gg.array("plead_constit_phi")

    qjet_pt = f_qq.array("plead_constit_pt")
    qjet_eta = f_qq.array("plead_constit_eta")
    qjet_phi = f_qq.array("plead_constit_phi")
    
    #remove events where there is no leading quark or gluon jet for some reason
    #gjet_pt = gjet_pt[gjet_pt.any()]
    #gjet_eta = gjet_eta[gjet_eta.any()]
    #gjet_phi = gjet_phi[gjet_phi.any()]

    #qjet_pt = qjet_pt[qjet_pt.any()]
    #qjet_eta = qjet_eta[qjet_eta.any()]
    #qjet_phi = qjet_phi[qjet_phi.any()]
    
    pad_size = 100
    
    quarks = np.array([[[qjet_pt[i,j],qjet_eta[i,j],qjet_phi[i,j]] for j in range(pad_size)] for i in range(nev_qq)])
    gluons = np.array([[[gjet_pt[i,j],gjet_eta[i,j],gjet_phi[i,j]] for j in range(pad_size)] for i in range(nev_gg)])
    
    #make vectors with truth labels, combine q & g samples, shuffle
    quark_labs = np.ones(np.size(quarks,axis=0))
    glu_labs = np.zeros(np.size(gluons,axis=0))

    X = np.concatenate((quarks,gluons))
    y = np.concatenate((quark_labs,glu_labs))

    shuf = np.arange(np.size(X,axis=0))
    np.random.shuffle(shuf)

    X = X[shuf]
    y = y[shuf]
    
    #network parameters
    train, test, val = 75000, 40000, 60000
    Phi_sizes, F_sizes = (100, 100, 128), (100, 100, 100)
    num_epoch = 5
    batch_size = 500

    #convert quark/gluon labels to categorical
    Y = to_categorical(y,num_classes=2)
    
    # preprocess by centering jets and normalizing pts
    for x in X:
        mask = x[:,0] > 0
        yphi_avg = np.average(x[mask,1:3], weights=x[mask,0], axis=0)
        x[mask,1:3] -= yphi_avg
        x[mask,0] /= x[:,0].sum()

    print('Finished preprocessing')
    
    # do train/val/test split 
    (X_train, X_val, X_test,
     Y_train, Y_val, Y_test) = data_split(X, Y, val=val, test=test)

    print('Done train/val/test split')
    
    print('Model summary:')

    # build architecture
    pfn = PFN(input_dim=X.shape[-1], Phi_sizes=Phi_sizes, F_sizes=F_sizes)

    # train model
    pfn.fit(X_train, Y_train,
              epochs=num_epoch,
              batch_size=batch_size,
              validation_data=(X_val, Y_val),
              verbose=1)

    # get predictions on test data
    preds = pfn.predict(X_test, batch_size=1000)

ZeroDivisionError: Weights sum to zero, can't be normalized