Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DQM: Remove more DQMStore features #28247

Merged
merged 14 commits into from Nov 24, 2019
Expand Up @@ -477,24 +477,6 @@ void TrackerOfflineValidationSummary::fillTree(TTree& tree,
treeMem.histNameNormY = h->GetName();
}

// Delete module level hists if set in cfg
const bool removeModuleLevelHists(parSet_.getParameter<bool>("removeModuleLevelHists"));
if (removeModuleLevelHists) {
dbe_->setCurrentFolder("");
if (it->second.ResHisto)
dbe_->removeElement(histDir + "/" + it->second.ResHisto->GetName());
if (it->second.NormResHisto)
dbe_->removeElement(histDir + "/" + it->second.NormResHisto->GetName());
if (it->second.ResXprimeHisto)
dbe_->removeElement(histDir + "/" + it->second.ResXprimeHisto->GetName());
if (it->second.NormResXprimeHisto)
dbe_->removeElement(histDir + "/" + it->second.NormResXprimeHisto->GetName());
if (it->second.ResYprimeHisto)
dbe_->removeElement(histDir + "/" + it->second.ResYprimeHisto->GetName());
if (it->second.NormResYprimeHisto)
dbe_->removeElement(histDir + "/" + it->second.NormResYprimeHisto->GetName());
}

tree.Fill();
}
}
Expand Down
Expand Up @@ -64,7 +64,6 @@
######################################################################
######################################################################
offlineDqmFileOutputTemplate = """
process.TrackerOfflineValidationSummary.oO[offlineValidationMode]Oo..removeModuleLevelHists = .oO[offlineModuleLevelHistsTransient]Oo.
process.DqmSaverTkAl.workflow = '.oO[workflow]Oo.'
process.DqmSaverTkAl.dirName = '.oO[workdir]Oo./.'
process.DqmSaverTkAl.forceRunNumber = .oO[firstRunNumber]Oo.
Expand Down
Expand Up @@ -4,7 +4,6 @@
moduleDirectoryInOutput = cms.string("Alignment/Tracker"), # has to be the same as in TrackerOfflineValidation_Dqm_cff
useFit = cms.bool(False),
stripYDmrs = cms.bool(False), # should be the same as for stripYResiduals in TrackerOfflineValidation_Dqm_cff
removeModuleLevelHists = cms.bool(False), # Remove module level hists after extracting the necessary information
minEntriesPerModuleForDmr = cms.uint32(100),

# DMR (distribution of median of residuals per module) of X coordinate (Strip)
Expand Down
Expand Up @@ -32,14 +32,13 @@
TrackerOfflineValidationDqm = TrackerOfflineValidationBinned.clone(
useInDqmMode = True,
moduleDirectoryInOutput = "Alignment/Tracker",
Tracks = 'TrackRefitterForOfflineValidation',
Tracks = 'TrackRefitterForOfflineValidation'
)

##
## TrackerOfflineValidationSummary
##
TrackerOfflineValidationSummaryDqm = TrackerOfflineValidationSummaryBinned.clone(
removeModuleLevelHists = True,
minEntriesPerModuleForDmr = 100
)

Expand Down
Expand Up @@ -175,7 +175,6 @@
#process.TrackerOfflineValidationDqm.Tracks = 'TrackRefitterP5'
#process.TrackerOfflineValidationDqm.trajectoryInput = 'TrackRefitterP5'
# Harvesting
#process.TrackerOfflineValidationSummaryDqm.removeModuleLevelHists = False
process.TrackerOfflineValidationSummaryDqm.minEntriesPerModuleForDmr = 1 # to allow tests with few statistics
# Output File
process.DqmSaverTkAl.workflow = "/Cosmics/TkAl09-335patch1_CRAFT09_R_V4_TestFile_CRAFT09reprocessing_WithoutCuts_R000109011_R000109624_ValSkim-v1/ALCARECO"
Expand Down
Expand Up @@ -27,7 +27,6 @@
# 2 provide more detailed output
Frequency=cms.untracked.int32(50),
MEPathToSave=cms.untracked.string('AlCaReco/EcalPedestalsPCL'),
deleteAfterCopy=cms.untracked.bool(True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tocheng @christopheralanwest @schneiml this can be tested using wf 1010.0 . In its step3 the log file is flooded with

Begin processing the 100th record. Run 299149, Event 579095302, LumiSection 408 on stream 3 at 20-Nov-2019 17:35:47.296 CET
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_0' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_1' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_10' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_11' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_12' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_13' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_14' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_15' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_16' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_17' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_18' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_19' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_2' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_20' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_21' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_22' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_23' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_24' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_25' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_26' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_27' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_28' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_29' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_3' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_30' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_31' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_32' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_33' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_34' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_35' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_36' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_37' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_38' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_39' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_4' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_40' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_41' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_42' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_43' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_44' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_45' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_46' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_47' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_48' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_49' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_5' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_50' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_51' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_52' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_53' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_54' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_55' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_56' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_57' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_58' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_59' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_6' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_60' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_61' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_62' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_63' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_64' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_65' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_66' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_67' in 'AlCaReco/EcalPedestalsPCL/EB/0'
DQMStore: WARNING: attempt to remove non-existent monitor element 'eb_68' in 'AlCaReco/EcalPedestalsPCL/EB/0'
(...)

Please comment

Copy link
Contributor

@fabiocos fabiocos Nov 22, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Repeating the test of @mmusich with wf 1010 step3 running 10000 events:

ecal.pdf

Copy link
Contributor Author

@schneiml schneiml Nov 22, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Re the message see the comment I made in another location: the code that used to produce them is deleted in this PR.

Re the RSS graph: We see the usual MEtoEDM endJob spike, before as after the PR, doubling the memory use as all histograms are copied into a EDM product. Removing the delete-calls makes it an exact doubling, before it might have been a bit less, as some histos where deleted before saving.

This spike is why "normal" reco jobs switched to DQMIO saving in LS1 (which removes this spike alltogether), but for the ALCA workflows memory use was not considered a problem and they where never migrated. (Edit: This is just for context, this PR does not affect anything about the MEtoEDM procedure.)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@schneiml the message is in the default version, this PR removes it, sorry for the possible confusion.

Concerning the RSS plot I do not see anything worrying, the behaviour old vs new looks quite similar.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@fabiocos re the message: ok, that is expected, there is nothing deleted any more and ofc there is no reason to warn about deleting non-existing things then.

This might explain why the memory use is effectively unaffected: The delete calls were not doing anything to begin with.

)

ALCALRECOEcalTCDSDigis = cms.EDProducer('TcdsRawToDigi')
Expand Down
Expand Up @@ -98,7 +98,6 @@
# 2 provide more detailed output
Frequency = cms.untracked.int32(50),
MEPathToSave = cms.untracked.string('AlCaReco/SiStripGainsAAG'),
deleteAfterCopy = cms.untracked.bool(True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tocheng @christopheralanwest @mmusich please suggest how to test this ALCAPROMPT, using --dasquery=file dataset=/ZeroBias/Run2016C-SiStripCalMinBiasAfterAbortGap-18Apr2017-v1/ALCARECO run=276097 in a similar way as done for SiStrpGains does not work:

Begin processing the 1st record. Run 276092, Event 32587086, LumiSection 74 on stream 0 at 22-Nov-2019 10:43:57.009 CET
----- Begin Fatal Exception 22-Nov-2019 10:43:57 CET-----------------------
An exception of category 'Configuration' occurred while
   [0] Processing  Event run: 276092 lumi: 74 event: 32587086 stream: 0
   [1] Running path 'pathALCARECOPromptCalibProdSiStripGainsAAG'
   [2] Calling method for module HLTHighLevel/'ALCARECOCalMinBiasFilterForSiStripGainsAAG'
Exception Message:
requested HLT path "pathALCARECOSiStripCalMinBiasAAG" does not exist
----- End Fatal Exception -------------------------------------------------

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you change the ALCA producer?

Copy link
Contributor

@fabiocos fabiocos Nov 22, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cmsDriver.py step3 --datatier ALCARECO --conditions auto:run2_data -s ALCA:PromptCalibProdSiStripGainsAAG --eventcontent ALCARECO -n 1000 --dasquery=file dataset=/ZeroBias/Run2016C-SiStripCalMinBiasAfterAbortGap-18Apr2017-v1/ALCARECO run=276097 

Copy link
Contributor

@mmusich mmusich Nov 22, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@fabiocos, the command is correct but the input dataset is not.
In 2017 we changed SiStripCalMinBiasAfterAbortGap to SiStripCalMinBiasAAG due to limitations in the output dataset name length in dataset storage system.
Because of this the PD you chose is not processable tout-court in recent releases, but needs to have this line https://github.com/cms-sw/cmssw/blob/master/Calibration/TkAlCaRecoProducers/python/ALCARECOPromptCalibProdSiStripGainsAAG_cff.py#L9 adjusted.
To make it work out of the box just switch to a more recent dataset like
/StreamExpress/Run2018C-SiStripCalMinBiasAAG-Express-v1/ALCARECO (is at T2_CH_CERN)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mmusich with step3 --processName ALCA2 --datatier ALCARECO --conditions auto:run2_data -s ALCA:PromptCalibProdSiStripGainsAAG --eventcontent ALCARECO -n 1000 --dasquery=file dataset=/StreamExpress/Run2018C-SiStripCalMinBiasAAG-Express-v1/ALCARECO I get

stripaag

which looks to me comparable to your test at the file crossing boundaries

)

# The actual sequence
Expand Down
Expand Up @@ -99,7 +99,6 @@
# 2 provide more detailed output
Frequency = cms.untracked.int32(50),
MEPathToSave = cms.untracked.string('AlCaReco/SiStripGains'),
deleteAfterCopy = cms.untracked.bool(True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

on a different topic, is this supposed to be useless with the new DQM infrastructure?
Looking back in my inbox this seemed to be necessary not to explode with memory at Tier-0 during operations (but this was back in 2016).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not exactly sure what this workflow does, but in general the memory concerns at Tier0 are pretty much gone since we no longer use per-stream MEs.

If the copy in this module (which hopefully does run in a HARVESTING step, and is not affected at all by the stream story) consumes too much memory, we need to find a different solution (like, book things in the right place, or read them from the right place, or rename things after the fact), but moving around stuff in the DQMStore is not something we can support for the future.

Actually, the comment previous to the one you linked mentioned that this is a end-of-job spike, which from legends I know was one of the reasons to move towards the DQMIO format. I wonder if we should finally stop pretending to this code that it is still run 1 and move it to current infrastructure... Sadly, I have no real clue how these workflows work and it seems nobody else has either, so that will be hard.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@schneiml

the memory concerns at Tier0 are pretty much gone since we no longer use per-stream MEs.

I am not sure how relevant it is, as to the best of my knowledge Tier0 AlCaSkim and AlCaHarvest jobs are single-threaded.
I would feel much better if we could organize (for this PR and the others that will come that will change MEtoEDMConverter) a more meaningful check than 10 events in the PR test. For some workflows a 10 event test will probe nothing as the computation won't even be started.
Can AlCa/DB conveners make sure this happens? @christopheralanwest @tlampen @tocheng

I wonder if we should finally stop pretending to this code that it is still run 1 and move it to current infrastructure...

I would be happy to take a look to achieve this. Can you explain what you mean by "current" infrastructure?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"current": Not MEtoEDM, if possible, or otherwise a good reasoning why it is required, and no reliance on edm::Service<DQMStore> (which implies using DQMEDAnalyzer or DQMEDHarvester base classes). It is well possible that this is not possible for some reason at the moment, but then we need to find out these reasons and decide what to do for the future.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so, I took the liberty of doing a bit of profiling.
I've re-run a similar step of what I had done here #22515 i.e.:

cmsDriver.py step3 --datatier ALCARECO --conditions auto:run2_data -s ALCA:PromptCalibProdSiStripGains --eventcontent ALCARECO -n 1000 --dasquery='file dataset=/ZeroBias/Run2016C-SiStripCalMinBias-18Apr2017-v1/ALCARECO run=276097'

and profiled the RSS memory:

memory

The spike at the end of the job is increased indeed, but I cannot comment if that is going to be a problem for Tier-0.
I also checked the output of the harvesting job, tweaking the configuration obtained via:

cmsDriver.py stepHarvest --data --conditions auto:run2_data --scenario pp -s ALCAHARVEST:SiStripGains --filein file:PromptCalibProdSiStripGains.root -n 1000 --fileout file:calib.root

in order to be forced to produce a payload via:

process.alcaSiStripGainsHarvester.GoodFracForTagProd =  cms.untracked.double(0.0)   #<- fraction of good fits (default is 98%)
process.alcaSiStripGainsHarvester.NClustersForTagProd = cms.untracked.double(100000)   #<- total number of clusters (default used is 8e8)

and the good news is that the payload hash is the same and I don't see differences in the harvested output (full diff here ):
image

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @mmusich for the test!

The observation is, I'd say, exactly compatible with what is done here: During processing, the memory use is the same, but the spike while MEtoEDM saving is higher if the unnecessary MEs are not deleted. The absolute change seems not too big to me, not sure if Tier0 sees this the same.

That the produced payload is identical is good to know.

)

# The actual sequence
Expand Down
Expand Up @@ -9,7 +9,6 @@
# 2 provide more detailed output
Frequency = cms.untracked.int32(50),
MEPathToSave = cms.untracked.string('AlCaReco/SiStrip'),
deleteAfterCopy = cms.untracked.bool(False)
Copy link
Contributor

@fabiocos fabiocos Nov 22, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

running the step2 of wf 1040 with the ALCAPRODUCER:@allForExpress option on 1000 events:
express

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@fabiocos, I am sorry to be picky, but I don't think @allForExpress resolves into the PromptCalibProd... ALCAPROMPT producer changed here, so I am afraid your plot proves nothing.
A more interesting test IMHO is to monitor the RSS of step3 of wf 1001.0:

cmsDriver.py step3  --datatier ALCARECO --triggerResultsProcess RECO --conditions
auto:run1_data -s ALCAOUTPUT:SiStripCalZeroBias+TkAlMinBias+DtCalib+Hotline+LumiPix
elsMinBias+AlCaPCCZeroBiasFromRECO+AlCaPCCRandomFromRECO,ALCA:PromptCalibProd+Promp
tCalibProdSiStrip+PromptCalibProdSiStripGains+PromptCalibProdSiStripGainsAAG+Prompt
CalibProdSiPixelAli+PromptCalibProdSiPixel --eventcontent ALCARECO -n 100  --filein
  file:step2.root  --fileout file:step3.root  

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mmusich as far as I can see in also the Tracer dump it do runs both MEtoEDMConvertSiStrip and pathALCARECOSiStripPCLHistos that are also modified in this PR

Copy link
Contributor

@mmusich mmusich Nov 22, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about PromptCalibProdSiStripGains... and PromptCalibProdSiPixelAli?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or your test was limited to this particular file change?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mmusich I believe that we are clearly missing a test that is realistically mimicking what happens nowadays at the T0 , not bits and pieces. My understanding from a discussion with @tocheng is that @allForExpress should cover all the ALCAPRODUCER available, and I looked at a few ALCAPROMPT touched by this PR separately.
It would be important to get from ALCA a realistic workflow on recent data. Anyway I will make a final check following your suggestion (but is this test on very old data representative of what happens in today's situation?)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the additional tests. I agree about the lack of a suitable relval test testing all the AlCa aspects of Express.
For tracker at least 1001.0 is a reasonable proxy. For the changes proposed here I don't think occupancy or n. of channel differences matter to measure a comparison of the memory consumption (it would be nice to have 1001.0 updated with Run2 input data though).

Copy link
Contributor

@fabiocos fabiocos Nov 24, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

running wf 1001 on 5000 events, the comparison of RSS used by step3 is

express2011

At this point I think that there is no evidence in these tests of strong reasons of concern, and a full test in a T0 replay may provide a final cross check

)

seqALCARECOSiStripPCLHistos = cms.Sequence(MEtoEDMConvertSiStrip)
Expand Up @@ -46,7 +46,6 @@
)

process.MEtoEDMConvertSiStrip = cms.EDProducer("MEtoEDMConverter",
deleteAfterCopy = cms.untracked.bool(False),
Verbosity = cms.untracked.int32(0),
Frequency = cms.untracked.int32(50),
Name = cms.untracked.string('MEtoEDMConverter'),
Expand Down
24 changes: 1 addition & 23 deletions DQM/BeamMonitor/plugins/AlcaBeamMonitorClient.cc
Expand Up @@ -63,29 +63,7 @@ AlcaBeamMonitorClient::~AlcaBeamMonitorClient() {}
void AlcaBeamMonitorClient::beginJob() {}

//----------------------------------------------------------------------------------------------------------------------
void AlcaBeamMonitorClient::beginRun(const edm::Run& r, const EventSetup& context) {
for (HistosContainer::iterator itM = histosMap_.begin(); itM != histosMap_.end(); itM++) {
for (map<string, map<string, MonitorElement*> >::iterator itMM = itM->second.begin(); itMM != itM->second.end();
itMM++) {
if (itMM->first != "run") {
for (map<string, MonitorElement*>::iterator itMMM = itMM->second.begin(); itMMM != itMM->second.end();
itMMM++) {
if (itMMM->second != nullptr) {
if (itMM->first == "lumi") {
dbe_->removeElement(monitorName_ + "Debug", itMMM->second->getName());
} else if (itMM->first == "validation") {
dbe_->removeElement(monitorName_ + "Validation", itMMM->second->getName());
} else {
LogInfo("AlcaBeamMonitorClient") << "Unrecognized category " << itMM->first;
// assert(0);
}
itMMM->second = nullptr;
}
}
}
}
}
}
void AlcaBeamMonitorClient::beginRun(const edm::Run& r, const EventSetup& context) {}

//----------------------------------------------------------------------------------------------------------------------

Expand Down
4 changes: 0 additions & 4 deletions DQM/BeamMonitor/plugins/BeamMonitorBx.cc
Expand Up @@ -197,10 +197,6 @@ void BeamMonitorBx::BookTables(int nBx, map<string, string>& vMap, string suffix
tmpName += "_";
tmpName += suffix_;
}
if (dbe_->get(monitorName_ + "FitBx/" + tmpName)) {
edm::LogInfo("BX|BeamMonitorBx") << "Rebinning " << tmpName << endl;
dbe_->removeElement(tmpName);
}

hs[tmpName] = dbe_->book2D(tmpName, varName->second, 3, 0, 3, nBx, 0, nBx);
hs[tmpName]->setBinLabel(1, "bx", 1);
Expand Down
2 changes: 0 additions & 2 deletions DQM/EcalCommon/interface/MESet.h
Expand Up @@ -97,8 +97,6 @@ namespace ecaldqm {
virtual bool isVariableBinning() const { return false; }
virtual MonitorElement const *getME(unsigned _iME) const { return (_iME < mes_.size() ? mes_[_iME] : nullptr); }
virtual MonitorElement *getME(unsigned _iME) { return (_iME < mes_.size() ? mes_[_iME] : nullptr); }
virtual void softReset();
virtual void recoverStats();

std::string formPath(PathReplacements const &) const;

Expand Down
2 changes: 0 additions & 2 deletions DQM/EcalCommon/interface/MESetDet2D.h
Expand Up @@ -62,8 +62,6 @@ namespace ecaldqm {

void reset(double = 0., double = 0., double = 0.) override;

void softReset() override;

protected:
void fill_(unsigned, int, double) override;
void fill_(unsigned, int, double, double) override;
Expand Down
20 changes: 0 additions & 20 deletions DQM/EcalCommon/src/MESet.cc
Expand Up @@ -243,26 +243,6 @@ namespace ecaldqm {
return false;
}

void MESet::softReset() {
if (!active_)
return;

DQMStore &store(*edm::Service<DQMStore>());

for (unsigned iME(0); iME < mes_.size(); ++iME)
store.softReset(mes_[iME]);
}

void MESet::recoverStats() {
if (!active_)
return;

DQMStore &store(*edm::Service<DQMStore>());

for (unsigned iME(0); iME < mes_.size(); ++iME)
store.disableSoftReset(mes_[iME]);
}

void MESet::fill_(unsigned _iME, int _bin, double _w) {
if (kind_ == MonitorElement::Kind::REAL)
return;
Expand Down
6 changes: 0 additions & 6 deletions DQM/EcalCommon/src/MESetDet2D.cc
Expand Up @@ -518,12 +518,6 @@ namespace ecaldqm {
}
}

void MESetDet2D::softReset() {
MESet::softReset();
if (!batchMode_ && kind_ == MonitorElement::Kind::TPROFILE2D)
resetAll(0., 0., -1.);
}

void MESetDet2D::fill_(unsigned _iME, int _bin, double _w) {
if (kind_ == MonitorElement::Kind::TPROFILE2D) {
MonitorElement *me(mes_.at(_iME));
Expand Down
8 changes: 1 addition & 7 deletions DQM/EcalMonitorDbModule/src/MonitorElementsDb.cc
Expand Up @@ -99,13 +99,7 @@ MonitorElementsDb::~MonitorElementsDb() { delete parser_; }

void MonitorElementsDb::beginJob(void) { ievt_ = 0; }

void MonitorElementsDb::endJob(void) {
std::cout << "MonitorElementsDb: analyzed " << ievt_ << " events" << std::endl;
for (unsigned int i = 0; i < MEs_.size(); i++) {
if (MEs_[i] != nullptr)
dqmStore_->removeElement(MEs_[i]->getName());
}
}
void MonitorElementsDb::endJob(void) { std::cout << "MonitorElementsDb: analyzed " << ievt_ << " events" << std::endl; }

void MonitorElementsDb::analyze(const edm::Event &e, const edm::EventSetup &c, coral::ISessionProxy *session) {
ievt_++;
Expand Down
5 changes: 0 additions & 5 deletions DQM/EcalMonitorTasks/interface/DQWorkerTask.h
Expand Up @@ -91,13 +91,8 @@ namespace ecaldqm {
// Returns true if the module runs on the collection
virtual bool analyze(void const*, Collections) { return false; }

void softReset();
void recoverStats();

protected:
void setME(edm::ParameterSet const&) final;

std::set<std::string> resettable_;
};
} // namespace ecaldqm
#endif
8 changes: 1 addition & 7 deletions DQM/EcalMonitorTasks/plugins/EcalDQMonitorTask.cc
Expand Up @@ -114,10 +114,6 @@ void EcalDQMonitorTask::dqmBeginRun(edm::Run const& _run, edm::EventSetup const&
}

void EcalDQMonitorTask::dqmEndRun(edm::Run const& _run, edm::EventSetup const& _es) {
if (lastResetTime_ != 0)
executeOnWorkers_([](ecaldqm::DQWorker* worker) { static_cast<ecaldqm::DQWorkerTask*>(worker)->recoverStats(); },
"recoverStats");

ecaldqmEndRun(_run, _es);

executeOnWorkers_([](ecaldqm::DQWorker* worker) { worker->releaseMEs(); }, "releaseMEs", "releasing histograms");
Expand All @@ -133,9 +129,7 @@ void EcalDQMonitorTask::dqmEndLuminosityBlock(edm::LuminosityBlock const& _lumi,
if (lastResetTime_ != 0 && (time(nullptr) - lastResetTime_) / 3600. > resetInterval_) {
if (verbosity_ > 0)
edm::LogInfo("EcalDQM") << moduleName_ << ": Soft-resetting the histograms";
executeOnWorkers_([](ecaldqm::DQWorker* worker) { static_cast<ecaldqm::DQWorkerTask*>(worker)->softReset(); },
"softReset");

// TODO: soft-reset is no longer supported.
lastResetTime_ = time(nullptr);
}
}
Expand Down
20 changes: 1 addition & 19 deletions DQM/EcalMonitorTasks/src/DQWorkerTask.cc
Expand Up @@ -5,7 +5,7 @@
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"

namespace ecaldqm {
DQWorkerTask::DQWorkerTask() : DQWorker(), resettable_() {}
DQWorkerTask::DQWorkerTask() : DQWorker() {}

/*static*/
void DQWorkerTask::fillDescriptions(edm::ParameterSetDescription& _desc) { DQWorker::fillDescriptions(_desc); }
Expand All @@ -16,27 +16,9 @@ namespace ecaldqm {
for (MESetCollection::iterator mItr(MEs_.begin()); mItr != MEs_.end(); ++mItr) {
if (willConvertToEDM_)
mItr->second->setBatchMode();

// TEMPORARY MEASURE - softReset does not accept variable bin size as of September 2012
// isVariableBinning is true for 1. MESetEcal or MESetNonObject with any custom binning or 2. MESetTrend
// In principle it is sufficient to protect the MESetTrends from being reset
if (mItr->second->getBinType() != ecaldqm::binning::kTrend && !mItr->second->isVariableBinning() &&
mItr->second->getKind() != MonitorElement::Kind::REAL)
resettable_.insert(mItr->first);
}
}

void DQWorkerTask::softReset() {
std::for_each(
resettable_.begin(), resettable_.end(), [this](std::string const& name) { this->MEs_.at(name).softReset(); });
}

void DQWorkerTask::recoverStats() {
std::for_each(resettable_.begin(), resettable_.end(), [this](std::string const& name) {
this->MEs_.at(name).recoverStats();
});
}

void DependencySet::formSequenceFragment_(Dependency const& _d,
std::vector<Collections>& _sequence,
std::vector<Collections>::iterator _maxPos) const {
Expand Down
Expand Up @@ -3,7 +3,6 @@

ecalPreshowerLocalMonitorClient = DQMEDHarvester('EcalPreshowerMonitorClient',
LookupTable = cms.untracked.FileInPath('EventFilter/ESDigiToRaw/data/ES_lookup_table.dat'),
enableCleanup = cms.untracked.bool(False),
enabledClients = cms.untracked.vstring('Pedestal'),
prefixME = cms.untracked.string('EcalPreshower'),
prescaleFactor = cms.untracked.int32(1),
Expand Down
5 changes: 0 additions & 5 deletions DQM/EcalPreshowerMonitorModule/interface/ESDaqInfoTask.h
Expand Up @@ -35,16 +35,11 @@ class ESDaqInfoTask : public edm::EDAnalyzer {
/// Reset
void reset(void);

/// Cleanup
void cleanup(void);

private:
DQMStore* dqmStore_;

std::string prefixME_;

bool enableCleanup_;

bool mergeRuns_;

MonitorElement* meESDaqFraction_;
Expand Down
Expand Up @@ -20,15 +20,12 @@ class ESDataCertificationTask : public edm::EDAnalyzer {
void endJob(void) override;
void beginLuminosityBlock(const edm::LuminosityBlock& lumiBlock, const edm::EventSetup& iSetup) override;
void reset(void);
void cleanup(void);

private:
DQMStore* dqmStore_;

std::string prefixME_;

bool enableCleanup_;

bool mergeRuns_;

MonitorElement* meESDataCertificationSummary_;
Expand Down
5 changes: 0 additions & 5 deletions DQM/EcalPreshowerMonitorModule/interface/ESDcsInfoTask.h
Expand Up @@ -35,16 +35,11 @@ class ESDcsInfoTask : public edm::EDAnalyzer {
/// Reset
void reset(void);

/// Cleanup
void cleanup(void);

private:
DQMStore* dqmStore_;

std::string prefixME_;

bool enableCleanup_;

bool mergeRuns_;

edm::EDGetTokenT<DcsStatusCollection> dcsStatustoken_;
Expand Down
1 change: 0 additions & 1 deletion DQM/EcalPreshowerMonitorModule/python/ESDaqInfoTask_cfi.py
Expand Up @@ -3,7 +3,6 @@
ecalPreshowerDaqInfoTask = cms.EDAnalyzer("ESDaqInfoTask",
esMapping = cms.PSet(LookupTable = cms.FileInPath("EventFilter/ESDigiToRaw/data/ES_lookup_table.dat")),
prefixME = cms.untracked.string('EcalPreshower'),
enableCleanup = cms.untracked.bool(False),
mergeRuns = cms.untracked.bool(False),
ESFedRangeMin = cms.untracked.int32(520),
ESFedRangeMax = cms.untracked.int32(575)
Expand Down
Expand Up @@ -2,6 +2,5 @@

ecalPreshowerDataCertificationTask = cms.EDAnalyzer("ESDataCertificationTask",
prefixME = cms.untracked.string('EcalPreshower'),
enableCleanup = cms.untracked.bool(False),
mergeRuns = cms.untracked.bool(False)
)
1 change: 0 additions & 1 deletion DQM/EcalPreshowerMonitorModule/python/ESDcsInfoTask_cfi.py
Expand Up @@ -2,7 +2,6 @@

ecalPreshowerDcsInfoTask = cms.EDAnalyzer("ESDcsInfoTask",
prefixME = cms.untracked.string('EcalPreshower'),
enableCleanup = cms.untracked.bool(False),
mergeRuns = cms.untracked.bool(False),
DcsStatusLabel = cms.InputTag("scalersRawToDigi")
)