From b9f79f071c13810096b3aa51d07e5ce0624e02fe Mon Sep 17 00:00:00 2001 From: Heidi Schellman <33669005+hschellman@users.noreply.github.com> Date: Wed, 18 Feb 2026 15:53:20 -0800 Subject: [PATCH 1/2] small changes for skip --- _episodes/03-data-management.md | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/_episodes/03-data-management.md b/_episodes/03-data-management.md index 136d452..0d53b5d 100644 --- a/_episodes/03-data-management.md +++ b/_episodes/03-data-management.md @@ -159,7 +159,7 @@ First get metacat if you have not already done so ~~~ -metacat query "files from dune:all where core.file_type=detector and core.run_type=hd-protodune and core.data_tier=raw and core.runs[any]=27331 limit 1" +metacat query "files from dune:all where core.file_type=detector and core.run_type=hd-protodune and core.data_tier=raw and core.runs[any]=27331 ordered limit 1" ~~~ {: .language-bash} @@ -347,6 +347,26 @@ You can also do keyword/value queries like the ones above using the Other tab on ![Full query search](../fig/otherquery.png){: .image-with-shadow } --> +### get a limited number of files in a query + +Batch workflows with more than 10,000 files are strongly discouraged (largely as when they fail, they fail BIG!). You can chop up larger sets by using the skip and limit fields in your query. + +To chop up a big query into smaller chunks: + +~~~ +export MYBIGQUERY= +export MYQUERY1="$MYBIGQUERY ordered skip 0 limit 1000" +export MYQUERY2="$MYBIGQUERY ordered skip 1000 limit 1000" +export MYQUERY3="$MYBIGQUERY ordered skip 2000 limit 1000" +..etc. +~~~ + +- the `ordered` assures that your query is reproducible + +- the `skip` needs to appear before the `limit` + +Always look at the output of your workflow on one of the queries before submitting them all. + ### find out how much data there is in a dataset Do a query of a dataset using the `-s` or `--summary` option @@ -417,7 +437,7 @@ You can use any of those keys to refine dataset searches as we did above. You pr You can either locate and click on a dataset in the [web data catalog](https://dune-tech.rice.edu/dunecatalog/) or use the[metacat web interface](https://metacat.fnal.gov:9443/dune_meta_prod/app/gui) or use the command line: ~~~ -metacat query "files from fardet-vd:fardet-vd__full-reconstructed__v09_81_00d02__reco2_dunevd10kt_anu_1x8x6_3view_30deg_geov3__prodgenie_anu_numu2nue_nue2nutau_dunevd10kt_1x8x6_3view_30deg__out1__v2_official limit 10" +metacat query "files from fardet-vd:fardet-vd__full-reconstructed__v09_81_00d02__reco2_dunevd10kt_anu_1x8x6_3view_30deg_geov3__prodgenie_anu_numu2nue_nue2nutau_dunevd10kt_1x8x6_3view_30deg__out1__v2_official ordered limit 10" ~~~ {: .languate-bash} @@ -575,7 +595,7 @@ What about some files from a reconstructed version? ~~~ metacat query "files from dune:all where core.file_type=detector \ and core.run_type='protodune-sp' and core.data_tier=full-reconstructed \ - and core.data_stream=physics and core.runs[any] in (5141) and dune.campaign=PDSPProd4 limit 10" + and core.data_stream=physics and core.runs[any] in (5141) and dune.campaign=PDSPProd4 ordered limit 10" ~~~ {: .language-bash} From ce2b0d11440ecc19c1495176dd52835a24e3c773 Mon Sep 17 00:00:00 2001 From: Heidi Schellman <33669005+hschellman@users.noreply.github.com> Date: Wed, 29 Apr 2026 13:25:23 -0700 Subject: [PATCH 2/2] checks for home area --- _config.yml | 2 +- setup.md | 37 +++++++++++++++++++++++++++++++++++-- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/_config.yml b/_config.yml index a1ed92e..7c8afce 100644 --- a/_config.yml +++ b/_config.yml @@ -11,7 +11,7 @@ carpentry: "dune" # Overall title for pages. -title: "Computing Basics for DUNE - Revised 2025 edition" +title: "Computing Basics for DUNE - Early 2026 edition" # Life cycle stage of the lesson # See this page for more details: https://cdh.carpentries.org/the-lesson-life-cycle.html diff --git a/setup.md b/setup.md index 97c970c..30ff0dc 100644 --- a/setup.md +++ b/setup.md @@ -233,14 +233,47 @@ Now you can try to log into a machine at Fermilab. There are now 15 different ma **How to connect?** The ssh command does the job. The -Y option turns on the xwindow protocol so that you can have graphical display and keyboard/mouse handling (quite useful). But if you have the line "ForwardX11Trusted yes" in your ssh config file, this will do the -Y option. For connecting to e.g. dunegpvm07, the command is: ~~~ -ssh username@dunegpvmXX.fnal.gov +ssh -Y username@dunegpvmXX.fnal.gov ~~~ {: .language-bash} where XX is a number from 01 to 15. If you experience long delays in loading programs or graphical output, you can try connecting with VNC. More info: [Using VNC Connections on the dunegpvms][dunegpvm-vnc]. Please remember to shut down your VNC connection at least once/week - the machines can get overrun by zombies. -### Get a clean shell +### Check that your kerberos ticket forwarded + +Some firewalls block kerberos ticket forwarding. Access to your home area on the gpvms requires that you have a valid ticket forwarded to that machine. + +Try the following: + +~~~ +touch ~/test.txt +~~~ +{: .language-bash} + +If it works, you are fine. If not: + +You may see errors like + +~~~ +Could not chdir to home directory /nashome/x/xenon: Permission denied +~~~ +{: .output} + +or it may complain about `.Xauthority` or other home area features. + +First check that you in fact did try to forward a ticket. + +~~~ +kinit -f -A username@FNAL.GOV +~~~ +{: .language-bash} + +and that your `~/.ssh/config` on the originating machine actually contains the lines listed above, as an operating system upgrade may have changed them. + +If all of that fails, you can `kinit` on the gpvm but do not do so in an X window as that traffic is not encrypted. + +### Always use a clean shell To run DUNE software, it is necessary to have a 'clean login'. What is meant by clean here? If you work on other experiment(s), you may have some environment variables defined (for NOvA, MINERvA, MicroBooNE). Theses may conflict with the DUNE environment ones. Two ways to clean your shell once on a DUNE machine: