In [1]:
import os

print(os.environ['PYTHONPATH'])

/home/matteoc/decaf/grinder:


In [2]:
import parsl
import os
from parsl.app.app import python_app, bash_app
from parsl.configs.local_threads import config

from parsl.providers import LocalProvider,CondorProvider,SlurmProvider
from parsl.channels import LocalChannel,SSHChannel
from parsl.config import Config
from parsl.executors import HighThroughputExecutor
from parsl.launchers import SrunLauncher

from parsl.addresses import address_by_hostname

x509_proxy = 'x509up_u%s'%(os.getuid())
year = '2018'

wrk_init = '''
export X509_USER_PROXY=${HOME}/x509up_u45169
export X509_CERT_DIR=${HOME}/certs/
export XRD_RUNFORKHANDLER=1
'''#%(x509_proxy)

twoGB = 2048
nproc = 48

sched_opts = '''
#SBATCH --cpus-per-task=%d
#SBATCH --mem-per-cpu=%d
''' % (nproc, twoGB, ) 

slurm_htex = Config(
    executors=[
        HighThroughputExecutor(
            label="coffea_parsl_slurm",
            address=address_by_hostname(),
            prefetch_capacity=0,  
            max_workers=nproc,
            provider=SlurmProvider(
                channel=LocalChannel(),
                launcher=SrunLauncher(),
                init_blocks=72,
                max_blocks=72,
                nodes_per_block=1,
                partition='general',
                scheduler_options=sched_opts,   # Enter scheduler_options if needed
                worker_init=wrk_init,         # Enter worker_init if needed
                walltime='02:00:00'
            ),
        )
    ],
    retries=10,
    strategy=None,
)

#parsl.set_stream_logger() # <-- log everything to stdout

dfk = parsl.load(slurm_htex)

chunksize=500000


In [3]:
lumis = {}
#Values from https://twiki.cern.ch/twiki/bin/viewauth/CMS/PdmVAnalysisSummaryTable
lumis['2016']=35.92
lumis['2017']=41.53
lumis['2018']=59.97
lumi = 1000.*float(lumis[year])

In [4]:
samples = {
    "iszeroL":('ZJets','WJets','DY','TT','ST','WW','WZ','ZZ','QCD','HToBB','MET'),
    "isoneM":('WJets','DY','TT','ST','WW','WZ','ZZ','QCD','HToBB','MET'),
    "isoneE":('WJets','DY','TT','ST','WW','WZ','ZZ','QCD','HToBB','SingleElectron','EGamma'),
    "istwoM":('WJets','DY','TT','ST','WW','WZ','ZZ','HToBB','MET'),
    "istwoE":('WJets','DY','TT','ST','WW','WZ','ZZ','HToBB','SingleElectron','EGamma'),
    "isoneA":('GJets','QCD','SinglePhoton','EGamma')
}

In [5]:
import json

with open("../harvester/beans/"+year+".json") as fin:
    samplefiles = json.load(fin)
xsec = {k: v['xs'] for k,v in samplefiles.items()}

print(xsec)

{'MET____0_': -1, 'EGamma____0_': -1, 'ZJetsToNuNu_HT-100To200_13TeV-madgraph____0_': 280.5, 'ZJetsToNuNu_HT-200To400_13TeV-madgraph____0_': 77.7, 'ZJetsToNuNu_HT-400To600_13TeV-madgraph____0_': 10.71, 'ZJetsToNuNu_HT-600To800_13TeV-madgraph____0_': 2.562, 'ZJetsToNuNu_HT-800To1200_13TeV-madgraph____0_': 1.183, 'ZJetsToNuNu_HT-1200To2500_13TeV-madgraph____0_': 0.286, 'ZJetsToNuNu_HT-2500ToInf_13TeV-madgraph____0_': 0.006945, 'DYJetsToLL_M-50_HT-100to200_TuneCP5_PSweights_13TeV-madgraphMLM-pythia8____0_': 147.4, 'DYJetsToLL_M-50_HT-200to400_TuneCP5_PSweights_13TeV-madgraphMLM-pythia8____0_': 40.99, 'DYJetsToLL_M-50_HT-400to600_TuneCP5_PSweights_13TeV-madgraphMLM-pythia8____0_': 5.678, 'DYJetsToLL_M-50_HT-600to800_TuneCP5_PSweights_13TeV-madgraphMLM-pythia8____0_': 1.367, 'DYJetsToLL_M-50_HT-800to1200_TuneCP5_PSweights_13TeV-madgraphMLM-pythia8____0_': 0.6304, 'DYJetsToLL_M-50_HT-1200to2500_TuneCP5_PSweights_13TeV-madgraphMLM-pythia8____0_': 0.1514, 'DYJetsToLL_M-50_HT-2500toInf_TuneCP5_

In [6]:
#get the analysis worker from the cloudpickle file
#import cloudpickle as cpkl
#import lz4.frame as lz4f
from analysis.darkhiggs import AnalysisProcessor

#processor_pkl = 'AnalysisProcessor.cpkl.lz4'
#AnalysisProcessor = None
#with lz4f.open(processor_pkl, mode="rb") as fin:
#    AnalysisProcessor = cpkl.load(fin)
print(AnalysisProcessor)

<class 'analysis.darkhiggs.AnalysisProcessor'>


In [8]:
import time
from coffea import hist, processor
from coffea.processor import run_parsl_job
from coffea.processor.parsl.parsl_executor import parsl_executor
import gzip
import pickle
import cloudpickle
import numpy as np
import lz4.frame as lz4f

filelist = {}
for dataset, info in samplefiles.items():
    #if your_wanted_dataset not in dataset: continue
    #dataset = dt.strip().split("____")[0]
    if not dataset in filelist: filelist[dataset] = []
    fileslice = slice(None)
    for file in info['files'][fileslice]:
        filelist[dataset].append(file)

        
selections = {}
for dataset in filelist:
    if not dataset in selections: selections[dataset] = []
    for selection,v in samples.items():
        for i in range (0,len(v)):
            if v[i] not in dataset: continue
    fileset = {}
    fileset[dataset] = filelist[dataset]
    processor_instance=AnalysisProcessor(selected_regions=selections[dataset], year=year, xsec=xsec, lumi=lumi)
    tstart = time.time()
    output = run_parsl_job(fileset,
                           treename='Events',
                           processor_instance=processor_instance,
                           executor=parsl_executor,
                           executor_args={'config':None, 'flatten': False},
                           data_flow=dfk,
                           chunksize=500000,
                          )

    # Pickle is not very fast or memory efficient, will be replaced by something better soon
    with lz4f.open("pods/"+year+"/"+dataset+".pkl.gz", mode="wb", compression_level=5) as fout:
        cloudpickle.dump(output, fout)
        
    dt = time.time() - tstart
    nbins = sum(sum(arr.size for arr in h._sumw.values()) for h in output.values() if isinstance(h, hist.Hist))
    nfilled = sum(sum(np.sum(arr > 0) for arr in h._sumw.values()) for h in output.values() if isinstance(h, hist.Hist))
    print("Filled %.1fM bins" % (nbins/1e6, ))
    print("Nonzero bins: %.1f%%" % (100*nfilled/nbins, ))


parsl version: 0.8.0


Preprocessing: 100%|██████████| 5125/5125 [00:04<00:00, 1258.47files/s]
Processing: 100%|██████████| 5125/5125 [01:13<00:00,  9.82items/s] 


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 21656/21656 [00:04<00:00, 5143.90files/s]
Processing: 100%|██████████| 21656/21656 [04:55<00:00, 73.26items/s] 


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 479/479 [00:05<00:00, 95.33files/s]
Processing: 100%|██████████| 479/479 [01:01<00:00,  7.82items/s] 


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 469/469 [00:04<00:00, 116.61files/s]
Processing: 100%|██████████| 469/469 [01:15<00:00,  6.19items/s] 


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 167/167 [00:05<00:00, 33.26files/s]
Processing: 100%|██████████| 167/167 [02:34<00:00,  2.19items/s]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 116/116 [00:04<00:00,  1.01files/s]
Processing: 100%|██████████| 116/116 [01:42<00:00,  1.97s/items]
Preprocessing:   0%|          | 0/42 [00:00<?, ?files/s]

Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 42/42 [00:06<00:00,  6.98files/s]
Processing: 100%|██████████| 42/42 [01:47<00:00,  2.56s/items]
Preprocessing:   0%|          | 0/7 [00:00<?, ?files/s]

Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 7/7 [00:04<00:00,  1.75files/s]
Processing: 100%|██████████| 7/7 [01:53<00:00, 17.84s/items]
Preprocessing:   0%|          | 0/8 [00:00<?, ?files/s]

Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 8/8 [00:04<00:00,  1.99files/s]
Processing: 100%|██████████| 8/8 [02:01<00:00, 15.17s/items]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 235/235 [00:04<00:00, 58.53files/s]
Processing: 100%|██████████| 235/235 [01:18<00:00,  2.98items/s]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 225/225 [00:04<00:00,  1.01files/s]
Processing: 100%|██████████| 225/225 [01:36<00:00,  1.75s/items] 


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 194/194 [00:04<00:00,  1.01files/s]
Processing: 100%|██████████| 194/194 [02:01<00:00,  1.62items/s]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 178/178 [00:04<00:00,  1.01files/s]
Processing: 100%|██████████| 178/178 [02:06<00:00,  1.40items/s] 


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 63/63 [00:05<00:00, 12.56files/s]
Processing: 100%|██████████| 63/63 [02:11<00:00,  1.19s/items]  
Preprocessing:   0%|          | 0/6 [00:00<?, ?files/s]

Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 6/6 [00:05<00:00,  1.20files/s]
Processing: 100%|██████████| 6/6 [07:03<00:00, 69.82s/items]
Preprocessing:   0%|          | 0/9 [00:00<?, ?files/s]

Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 9/9 [00:05<00:00,  1.79files/s]
Processing: 100%|██████████| 9/9 [02:23<00:00, 15.93s/items]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 590/590 [00:06<00:00, 97.82files/s]
Processing: 100%|██████████| 590/590 [01:11<00:00,  8.26items/s]  


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 513/513 [00:05<00:00, 102.05files/s]
Processing: 100%|██████████| 513/513 [01:27<00:00,  3.79items/s]  


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 119/119 [00:05<00:00, 23.70files/s]
Processing: 100%|██████████| 119/119 [01:44<00:00,  2.66s/items]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 398/398 [00:05<00:00, 79.19files/s]
Processing: 100%|██████████| 398/398 [01:56<00:00,  2.22items/s] 


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 169/169 [00:05<00:00, 33.67files/s]
Processing: 100%|██████████| 169/169 [02:01<00:00,  1.09s/items]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 153/153 [00:05<00:00, 30.49files/s]
Processing: 100%|██████████| 153/153 [02:11<00:00,  1.27s/items] 


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 66/66 [00:05<00:00, 13.15files/s]
Processing: 100%|██████████| 66/66 [02:18<00:00,  1.61s/items] 


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 198/198 [00:05<00:00, 39.46files/s]
Processing: 100%|██████████| 198/198 [01:00<00:00,  2.95s/items] 


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 386/386 [00:04<00:00, 96.02files/s]
Processing: 100%|██████████| 386/386 [01:18<00:00,  4.94items/s] 


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 94/94 [00:04<00:00, 23.42files/s]
Processing: 100%|██████████| 94/94 [01:39<00:00,  1.19s/items] 


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 101/101 [00:04<00:00,  1.00files/s]
Processing: 100%|██████████| 101/101 [01:47<00:00,  1.71s/items]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 1893/1893 [00:04<00:00, 468.47files/s] 
Processing: 100%|██████████| 1893/1893 [00:59<00:00, 31.75items/s] 


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 1100/1100 [00:04<00:00, 155.43files/s]
Processing: 100%|██████████| 1100/1100 [01:07<00:00, 16.18items/s]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 1108/1108 [00:04<00:00, 275.01files/s]
Processing: 100%|██████████| 1108/1108 [01:27<00:00, 12.72items/s] 


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 1093/1093 [00:04<00:00, 99.43files/s] 
Processing: 100%|██████████| 1093/1093 [01:49<00:00, 10.01items/s] 


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 976/976 [00:04<00:00, 242.26files/s]
Processing: 100%|██████████| 976/976 [01:50<00:00,  8.83items/s]  


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 312/312 [00:04<00:00, 77.59files/s]
Processing: 100%|██████████| 312/312 [01:56<00:00,  2.67items/s] 


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 222/222 [00:04<00:00,  1.01files/s]
Processing: 100%|██████████| 222/222 [01:58<00:00,  1.37s/items] 


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 111/111 [00:04<00:00, 27.65files/s]
Processing: 100%|██████████| 111/111 [02:02<00:00,  1.89s/items]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 153/153 [00:04<00:00,  1.01files/s]
Processing: 100%|██████████| 153/153 [01:42<00:00,  1.02s/items]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 192/192 [00:04<00:00, 47.78files/s]
Processing: 100%|██████████| 192/192 [01:35<00:00,  2.60s/items] 


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 1578/1578 [00:04<00:00, 191.64files/s] 
Processing: 100%|██████████| 1578/1578 [01:25<00:00, 18.51items/s] 


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 1197/1197 [00:04<00:00, 163.28files/s]
Processing: 100%|██████████| 1197/1197 [05:28<00:00,  8.36s/items]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 2848/2848 [00:05<00:00, 564.16files/s] 
Processing: 100%|██████████| 2848/2848 [02:22<00:00, 20.01items/s] 


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 169/169 [00:04<00:00, 42.09files/s]
Processing: 100%|██████████| 169/169 [01:04<00:00,  2.61items/s]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 78/78 [00:04<00:00, 19.42files/s]
Processing: 100%|██████████| 78/78 [01:05<00:00,  1.19items/s]
Preprocessing:   0%|          | 0/41 [00:00<?, ?files/s]

Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 41/41 [00:04<00:00, 10.21files/s]
Processing: 100%|██████████| 41/41 [01:03<00:00,  3.49s/items]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 92/92 [00:04<00:00,  1.00files/s]
Processing: 100%|██████████| 92/92 [01:14<00:00,  1.61s/items]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 142/142 [00:05<00:00, 28.29files/s]
Processing: 100%|██████████| 142/142 [01:34<00:00,  1.50items/s]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 106/106 [00:04<00:00, 26.40files/s]
Processing: 100%|██████████| 106/106 [01:16<00:00,  1.13items/s]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 88/88 [00:04<00:00, 21.92files/s]
Processing: 100%|██████████| 88/88 [01:43<00:00,  1.65s/items]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 73/73 [00:04<00:00,  1.00s/files]
Processing: 100%|██████████| 73/73 [01:22<00:00,  3.05s/items] 


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 73/73 [00:04<00:00,  1.00files/s]
Processing: 100%|██████████| 73/73 [01:25<00:00,  2.23s/items]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 237/237 [00:05<00:00, 47.27files/s]
Processing: 100%|██████████| 237/237 [02:16<00:00,  1.67s/items] 


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 292/292 [00:04<00:00, 72.62files/s]
Processing: 100%|██████████| 292/292 [01:05<00:00,  4.44items/s] 


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 152/152 [00:04<00:00,  1.01files/s]
Processing: 100%|██████████| 152/152 [01:15<00:00,  2.01items/s]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 300/300 [00:05<00:00, 59.72files/s]
Processing: 100%|██████████| 300/300 [00:08<00:00, 34.81items/s]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 300/300 [00:05<00:00, 59.81files/s]
Processing: 100%|██████████| 300/300 [00:09<00:00, 32.38items/s]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 300/300 [00:05<00:00, 59.75files/s]
Processing: 100%|██████████| 300/300 [00:07<00:00, 39.47items/s]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 300/300 [00:05<00:00, 59.71files/s]
Processing: 100%|██████████| 300/300 [00:07<00:00,  2.87items/s]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 299/299 [00:04<00:00,  1.01files/s]
Processing: 100%|██████████| 299/299 [00:07<00:00, 39.36items/s]


Filled 0.0M bins
Nonzero bins: 33.3%
parsl version: 0.8.0


Preprocessing: 100%|██████████| 300/300 [00:05<00:00, 59.75files/s]
Processing: 100%|██████████| 300/300 [00:07<00:00, 39.46items/s]

Filled 0.0M bins
Nonzero bins: 33.3%





In [9]:
parsl.dfk().cleanup()
parsl.clear()
