# Four Muon Spectrum

This code is another showcase of the awkward array toolset, and utilizing FCAT histograms in addition to advanced functionality.
This shows the analysis object syntax implemented by FCAT `JaggedCandidateArray`, along with a multi-tiered physics selection, and the usage of an accumulator class provided by FCAT.  We now add in the concept of corrections as well in the case of a Monte-Carlo sample.

In [1]:
import time

from fnal_column_analysis_tools import hist
from fnal_column_analysis_tools.hist import plot
from fnal_column_analysis_tools.analysis_objects import JaggedCandidateArray
import fnal_column_analysis_tools.processor as processor
from awkward import JaggedArray
import numpy as np

In [2]:
from parsl.providers import LocalProvider
from parsl.channels import LocalChannel
from parsl.executors import HighThroughputExecutor
from parsl.addresses import address_by_hostname
from parsl.config import Config
parsl_config = Config(
    executors=[
               HighThroughputExecutor(
               label="coffea_parsl_default",
               address=address_by_hostname(),
               max_workers=1,
               provider=LocalProvider(
               channel=LocalChannel(),
               worker_init='source $HOME/testing/py36/bin/activate',
               init_blocks=1,
               max_blocks=1,
               nodes_per_block=6
            ),
        )
    ],
    strategy=None,
)

In [3]:
# Look at ProcessorABC to see the expected methods and what they are supposed to do
class FancyDimuonProcessor(processor.ProcessorABC):
    def __init__(self, columns=[]):
        self._columns = columns
        dataset_axis = hist.Cat("dataset", "Primary dataset")
        mass_axis = hist.Bin("mass", r"$m_{\mu\mu}$ [GeV]", 600, 0.25, 300)
        pt_axis = hist.Bin("pt", r"$p_{T,\mu}$ [GeV]", 3000, 0.25, 300)
        
        self._accumulator = processor.dict_accumulator({
            'mass': hist.Hist("Counts", dataset_axis, mass_axis),
            'mass_near': hist.Hist("Counts", dataset_axis, mass_axis),
            'mass_far': hist.Hist("Counts", dataset_axis, mass_axis),
            'pt_lead': hist.Hist("Counts", dataset_axis, pt_axis),
            'pt_trail': hist.Hist("Counts", dataset_axis, pt_axis),
            'cutflow': processor.defaultdict_accumulator(int),
        })
    
    @property
    def accumulator(self):
        return self._accumulator
    
    @property
    def columns(self):
        return self._columns
    
    def process(self, df):
        output = self.accumulator.identity()
        
        dataset = df['dataset']
        muons = JaggedCandidateArray.candidatesfromcounts(
            df['nMuon'],
            pt=df['Muon_pt'],
            eta=df['Muon_eta'],
            phi=df['Muon_phi'],
            mass=df['Muon_mass'],
            charge=df['Muon_charge'],
            softId=df['Muon_softId'],
            tightId=df['Muon_tightId']
            )        
        
        output['cutflow']['all events'] += muons.size
        
        soft_id = (muons.softId > 0)
        muons = muons[soft_id]
        output['cutflow']['soft id'] += soft_id.any().sum()
        
        twomuons = (muons.counts >= 2)
        output['cutflow']['two muons'] += twomuons.sum()
        
        dimuons = muons[twomuons].distincts()
        
        twodimuons = (dimuons.counts >= 2)
        output['cutflow']['>= two dimuons'] += twodimuons.sum()
        dimuons = dimuons[twodimuons]
        
        opposite_charge = (dimuons.i0['charge'] * dimuons.i1['charge'] == -1)
        
        dimuons = dimuons[opposite_charge]
        output['cutflow']['opposite charge'] += opposite_charge.any().sum()
        
        mass_20GeV = (dimuons.mass > 35)
        dimuons = dimuons[mass_20GeV]
        
        exactlytwodimuons = (dimuons.counts == 2)
        output['cutflow']['== two dimuons'] += exactlytwodimuons.sum()
        dimuons = dimuons[exactlytwodimuons].compact()
        
        leading_mu = (dimuons.i0.pt.content > dimuons.i1.pt.content)
        pt_lead = JaggedArray.fromoffsets(dimuons.offsets, np.where(leading_mu, 
                                                                    dimuons.i0.pt.content, dimuons.i1.pt.content))
        pt_trail = JaggedArray.fromoffsets(dimuons.offsets, np.where(~leading_mu, 
                                                                     dimuons.i0.pt.content, dimuons.i1.pt.content))
        
        near_z = np.abs(dimuons.mass - 91.118).argmin()
        far_z = np.abs(dimuons.mass - 91.118).argmax()
        
        output['mass'].fill(dataset=dataset,
                            mass=dimuons.p4.sum().mass)
        output['mass_near'].fill(dataset=dataset, 
                                 mass=dimuons.mass[near_z].flatten())
        output['mass_far'].fill(dataset=dataset, 
                                mass=dimuons.mass[far_z].flatten())
        output['pt_lead'].fill(dataset=dataset,
                               pt=pt_lead.flatten())
        output['pt_trail'].fill(dataset=dataset,
                                pt=pt_trail.flatten())
        return output

    def postprocess(self, accumulator):
        return accumulator

In [4]:
from fnal_column_analysis_tools.processor.parsl.detail import (_parsl_initialize,
                                                               _parsl_stop)


In [5]:
from fnal_column_analysis_tools.processor import run_parsl_job
from fnal_column_analysis_tools.processor.parsl.parsl_executor import parsl_executor

dfk = _parsl_initialize(parsl_config)

tstart = time.time()    

fileset = {
    'DoubleMuon': [
        '/mnt/hdfs/store/user/lgray/scale_out/root/Run2012B_DoubleMuParked.root',
        '/mnt/hdfs/store/user/lgray/scale_out/root/Run2012C_DoubleMuParked.root',
    ],
    'ZZto4mu': [
        '/mnt/hdfs/store/user/lgray/scale_out/root/ZZTo4mu.root'
    ]
}
treename='Events'

proc = FancyDimuonProcessor()

output = run_parsl_job(fileset, treename, processor_instance=proc, executor=parsl_executor, data_flow=dfk)

elapsed = time.time() - tstart
print(output)

Preprocessing:   0%|          | 0/3 [00:00<?, ?files/s]

parsl version: 0.7.2


Preprocessing: 100%|██████████| 3/3 [00:05<00:00,  2.64s/files]
Processing:  25%|██▌       | 34/136 [03:13<08:00,  4.71s/items]Process Process-1:
Traceback (most recent call last):
  File "/usr/lib64/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib64/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/cms.lgray/testing/py36/lib/python3.6/site-packages/parsl/executors/high_throughput/interchange.py", line 539, in starter
    ic.start()
  File "/home/cms.lgray/testing/py36/lib/python3.6/site-packages/parsl/executors/high_throughput/interchange.py", line 348, in start
    self.socks = dict(poller.poll(timeout=poll_period))
  File "/home/cms.lgray/testing/py36/lib/python3.6/site-packages/zmq/sugar/poll.py", line 99, in poll
    return zmq_poll(self.sockets, timeout=timeout)
  File "zmq/backend/cython/_poll.pyx", line 123, in zmq.backend.cython._poll.zmq_poll
  File "zmq/backend/cyth

KeyboardInterrupt: 

In [None]:
_parsl_stop(dfk)

In [None]:
fig, ax, _ = plot.plot1d(output['mass'], overlay='dataset')
ax.set_xlim(70,150)
ax.set_ylim(0, 3000)

In [None]:
fig, ax, _ = plot.plot1d(output['mass_near'], overlay='dataset')
#ax.set_xscale('log')
#ax.set_yscale('log')
ax.set_xlim(60,120)
ax.set_ylim(0.1, 7500)

In [None]:
fig, ax, _ = plot.plot1d(output['mass_far'], overlay='dataset')
#ax.set_xscale('log')
#ax.set_yscale('log')
ax.set_ylim(0.1, 8000)

In [None]:
fig, ax, _ = plot.plot1d(output['pt_lead'], overlay='dataset')
#ax.set_xscale('log')
ax.set_yscale('log')
ax.set_ylim(0.1, 5e3)

In [None]:
fig, ax, _ = plot.plot1d(output['pt_trail'], overlay='dataset')
#ax.set_xscale('log')
ax.set_yscale('log')
ax.set_ylim(0.1, 2e4)

In [None]:
print("Events/s:", output['cutflow']['all events']/elapsed)