Merge branch 'master' into correctionlib_wrapper_docs

CoffeaTeam · May 15, 2023 · 91f13c4 · 91f13c4
2 parents 3251704 + e4a1aa8
commit 91f13c4
Show file tree

Hide file tree

Showing 10 changed files with 128 additions and 88 deletions.
diff --git a/.cirrus.yml b/.cirrus.yml
@@ -12,7 +12,7 @@ task:
         memory: 7G
         matrix:
           - image: python:3.8
-          - image: python:3.10
+          - image: python:3.11
 
       create_venv_script: |
         python -m venv ../venv

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -4,12 +4,14 @@ on:
   push:
     branches:
       - master
+      - backports-v0.7.x
       - backports-v0.6.x
     tags:
       - v*.*.*
   pull_request:
     branches:
       - master
+      - backports-v0.7.x
       - backports-v0.6.x
   # Run daily at 0:01 UTC
   schedule:
@@ -34,7 +36,7 @@ jobs:
         os: [ubuntu-latest, macOS-latest, windows-latest]
         java-version: [17]
         java-distribution: ["corretto"]
-        python-version: ["3.8", "3.10"]
+        python-version: ["3.8", "3.11"]
 
     name: test coffea (${{ matrix.os }}) - python ${{ matrix.python-version }}, JDK${{ matrix.java-version }}
 
@@ -79,21 +81,21 @@ jobs:
       run: |
         pytest --cov-report=xml --cov=coffea tests
     - name: Upload codecov
-      if: matrix.os == 'ubuntu-latest' && matrix.python-version == 3.10
+      if: matrix.os == 'ubuntu-latest' && matrix.python-version == 3.11
       uses: codecov/codecov-action@v3
     - name: Install graphviz
-      if: matrix.os == 'ubuntu-latest' && matrix.python-version == 3.10
-      uses: kamiazya/setup-graphviz@v1
+      if: matrix.os == 'ubuntu-latest' && matrix.python-version == 3.11
+      uses: ts-graphviz/setup-graphviz@v1
     - name: Install pandoc
-      if: matrix.os == 'ubuntu-latest' && matrix.python-version == 3.10
+      if: matrix.os == 'ubuntu-latest' && matrix.python-version == 3.11
       uses: r-lib/actions/setup-pandoc@v2
     - name: Build documentation
-      if: matrix.os == 'ubuntu-latest' && matrix.python-version == 3.10
+      if: matrix.os == 'ubuntu-latest' && matrix.python-version == 3.11
       run: |
         cd docs && make html
         touch build/html/.nojekyll
     - name: Deploy documentation
-      if: github.event_name == 'push' && matrix.os == 'ubuntu-latest' && matrix.python-version == 3.10
+      if: github.event_name == 'push' && matrix.os == 'ubuntu-latest' && matrix.python-version == 3.11
       uses: crazy-max/ghaction-github-pages@v3
       with:
         target_branch: gh-pages
@@ -106,18 +108,19 @@ jobs:
     needs: pre-commit
     strategy:
       matrix:
-        python-version: ["3.10"]
+        python-version: ["3.11"]
     name: test coffea-workqueue
 
     steps:
     - uses: actions/checkout@v3
     - name: Set up Conda
-      uses: conda-forge/setup-miniforge@v1
+      uses: conda-incubator/setup-miniconda@v2
       env:
         ACTIONS_ALLOW_UNSECURE_COMMANDS: true
       with:
         auto-update-conda: true
         python-version: ${{ matrix.python-version }}
+        channels: conda-forge
     - name: Test work_queue
       shell: bash -l {0}
       run: |
@@ -155,7 +158,7 @@ jobs:
     needs: [test, testwq]
     strategy:
       matrix:
-        python-version: ["3.10"]
+        python-version: ["3.11"]
     name: deploy release
 
     steps:
@@ -168,15 +171,8 @@ jobs:
       run: |
         python -m pip install --upgrade pip setuptools wheel
         python setup.py sdist bdist_wheel --universal
-    - name: Create Release
-      uses: actions/create-release@v1
-      env:
-        GITHUB_TOKEN: ${{ secrets.GITHUB_OAUTH }}
-      with:
-        tag_name: ${{ github.ref }}
-        release_name: Release ${{ github.ref }}
     - name: Publish package to PyPI
-      uses: pypa/gh-action-pypi-publish@v1.8.5
+      uses: pypa/gh-action-pypi-publish@v1.8.6
       with:
         user: __token__
         password: ${{ secrets.PYPI_TOKEN }}

diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
@@ -0,0 +1,22 @@
+name: "Lint PR"
+
+on:
+  pull_request:
+    types:
+      - opened
+      - edited
+      - synchronize
+  workflow_dispatch:
+
+concurrency:
+  group: semantic-pr-title-${{ github.head_ref }}
+  cancel-in-progress: true
+
+jobs:
+  main:
+    name: Validate PR title
+    runs-on: ubuntu-latest
+    steps:
+      - uses: amannn/action-semantic-pull-request@v5.2.0
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -37,7 +37,7 @@ repos:
   - id: trailing-whitespace
 
 - repo: https://github.com/asottile/pyupgrade
-  rev: v3.3.2
+  rev: v3.4.0
   hooks:
   - id: pyupgrade
     args: ["--py36-plus"]

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -14,7 +14,7 @@
 
 * Ensure the PR description clearly describes the problem and solution. Include the relevant issue number if applicable.
 
-* Before submitting, please run `flake8 coffea` and `pytest` to ensure you follow our formatting conventions and do not break any existing code. Furthermore, we prefer that any newly contributed code does not reduce the current code coverage of the repository. Please make sure your test your code as thoroughly as is needed.
+* Before submitting, please run `pre-commit run --all-files` and `pytest` to ensure you follow our formatting conventions and do not break any existing code. Furthermore, we prefer that any newly contributed code does not reduce the current code coverage of the repository. Please make sure your test your code as thoroughly as is needed.
 
 #### **Did you fix whitespace, format code, or make a purely cosmetic patch?**
 
@@ -38,6 +38,8 @@ Changes that are cosmetic in nature and do not add anything substantial to the s
 
 coffea is a HEP community and volunteer effort. We encourage you to pitch in and [join the team](mailto:cms-coffea@cern.ch)!
 
+* Fixes, changes, and documentation updates will be released in a timely manner. Coffea follows [CalVer](https://calver.org/) practices. Repository maintainers will generate new releases as necessary, and releases are made using the github release pages.
+
 Thanks! :coffee: :coffee: :coffee:
 
 Coffea Team
diff --git a/coffea/lookup_tools/lookup_base.py b/coffea/lookup_tools/lookup_base.py
@@ -8,43 +8,57 @@
 
 
 def getfunction(
-    args, thelookup_dask=None, thelookup_wref=None, __pre_args__=tuple(), **kwargs
+    args,
+    thelookup_dask=None,
+    thelookup_wref=None,
+    __non_array_args__=tuple(),
+    __arg_indices__=tuple(),
+    **kwargs
 ):
     if not isinstance(args, (list, tuple)):
         args = (args,)
+
+    if not len(args) + len(__non_array_args__) == len(__arg_indices__):
+        raise ValueError(
+            "Total length of array and non-array args should match expected placement."
+        )
+
     if all(
         isinstance(x, (awkward.contents.NumpyArray, awkward.contents.EmptyArray))
         or not isinstance(x, (awkward.contents.Content))
         for x in args
     ):
         result = None
         backend = awkward.backend(*args)
-        if backend == "cpu":
-            thelookup = None
-            if thelookup_wref is not None:
-                thelookup = thelookup_wref()
-            else:
-                thelookup = thelookup_dask.compute()
-            result = thelookup._evaluate(
-                *(list(__pre_args__) + [awkward.to_numpy(arg) for arg in args]),
-                **kwargs,
-            )
-        elif backend == "typetracer":
-            zlargs = []
-            for arg in args:
+
+        if backend != "cpu" and backend != "typetracer":
+            raise NotImplementedError("support for cupy/jax/etc. numpy extensions")
+
+        # order args and __non_array_args__ correctly
+        repacked_args = [None] * len(__arg_indices__)
+        for iarg, arg in enumerate(args):
+            if backend == "cpu":
+                repacked_args[__arg_indices__[iarg]] = awkward.to_numpy(arg)
+            elif backend == "typetracer":
                 arg._touch_data(recursive=True)
-                zlargs.append(
-                    awkward.Array(
-                        arg.form.length_zero_array(highlevel=False),
-                    )
+                zlarr = awkward.Array(
+                    arg.form.length_zero_array(highlevel=False),
                 )
-            result = thelookup_wref()._evaluate(
-                *(list(__pre_args__) + [awkward.to_numpy(zlarg) for zlarg in zlargs]),
-                **kwargs,
-            )
+                repacked_args[__arg_indices__[iarg]] = awkward.to_numpy(zlarr)
+
+        for inaarg, naarg in enumerate(__non_array_args__):
+            repacked_args[__arg_indices__[inaarg + len(args)]] = naarg
+
+        thelookup = None
+        if thelookup_wref is not None:
+            thelookup = thelookup_wref()
         else:
-            raise NotImplementedError("support for cupy/jax/etc. numpy extensions")
+            from dask.distributed import worker_client
 
+            with worker_client() as client:
+                thelookup = client.compute(thelookup_dask).result()
+
+        result = thelookup._evaluate(*repacked_args, **kwargs)
         out = awkward.contents.NumpyArray(result)
         if backend == "typetracer":
             return out.to_typetracer(forget_length=True)
@@ -53,12 +67,13 @@ def getfunction(
 
 
 class _LookupXformFn:
-    def __init__(self, *args, thelookup_dask, thelookup_wref, **kwargs):
+    def __init__(self, *args, arg_indices, thelookup_dask, thelookup_wref, **kwargs):
         self.func = partial(
             getfunction,
             thelookup_dask=thelookup_dask,
             thelookup_wref=thelookup_wref,
-            __pre_args__=args,
+            __non_array_args__=args,
+            __arg_indices__=arg_indices,
             **kwargs,
         )
 
@@ -75,22 +90,33 @@ def __init__(self, dask_future):
 
     def __call__(self, *args, **kwargs):
         dask_label = kwargs.pop("dask_label", None)
+
+        actual_args = []
+        actual_arg_indices = []
+        delay_args = []
+        delay_arg_indices = []
+        for iarg, arg in enumerate(args):
+            if isinstance(arg, (awkward.highlevel.Array, dask_awkward.Array)):
+                actual_args.append(arg)
+                actual_arg_indices.append(iarg)
+            else:
+                delay_args.append(arg)
+                delay_arg_indices.append(iarg)
+        arg_indices = tuple(actual_arg_indices + delay_arg_indices)
+        actual_args = tuple(actual_args)
+        delay_args = tuple(delay_args)
+
+        tomap = _LookupXformFn(
+            *delay_args,
+            arg_indices=arg_indices,
+            thelookup_dask=self._dask_future,
+            thelookup_wref=self._weakref,
+            **kwargs,
+        )
+
         # if our inputs are all dask_awkward arrays, then we should map_partitions
         if any(isinstance(x, (dask_awkward.Array)) for x in args):
-            import dask
-
-            delay_args = tuple(
-                arg for arg in args if not isinstance(arg, dask_awkward.Array)
-            )
-            actual_args = tuple(
-                arg for arg in args if isinstance(arg, dask_awkward.Array)
-            )
-            tomap = _LookupXformFn(
-                *delay_args,
-                thelookup_dask=self._dask_future,
-                thelookup_wref=self._weakref,
-                **kwargs,
-            )
+            from dask.base import tokenize
 
             zlargs = [
                 awkward.Array(
@@ -109,14 +135,14 @@ def __call__(self, *args, **kwargs):
                     tomap,
                     *actual_args,
                     label=dask_label,
-                    token=dask.base.tokenize(self._dask_future.name, *args),
+                    token=tokenize(self._dask_future.name, *args),
                     meta=meta,
                 )
             else:
                 return dask_awkward.map_partitions(
                     tomap,
                     *actual_args,
-                    token=dask.base.tokenize(self._dask_future.name, *args),
+                    token=tokenize(self._dask_future.name, *args),
                     meta=meta,
                 )
 
@@ -131,22 +157,7 @@ def __call__(self, *args, **kwargs):
                 " numpy arrays, strings, or numbers!"
             )
 
-        # behavior = awkward._util.behavior_of(*args)
-        non_array_args = tuple(
-            arg for arg in args if not isinstance(arg, awkward.highlevel.Array)
-        )
-        array_args = tuple(
-            arg for arg in args if isinstance(arg, awkward.highlevel.Array)
-        )
-        func = partial(
-            getfunction,
-            thelookup_dask=self._dask_future,
-            thelookup_wref=self._weakref,
-            __pre_args__=non_array_args,
-            **kwargs,
-        )
-        out = awkward.transform(func, *array_args)
-        return out
+        return tomap(*actual_args)
 
     def _evaluate(self, *args, **kwargs):
         raise NotImplementedError
diff --git a/coffea/processor/helpers.py b/coffea/processor/helpers.py
@@ -115,9 +115,9 @@ def partial_weight(self, include=[], exclude=[]):
 
         Parameters
         ----------
-            include : list
+            include : list | set
                 Weight names to include, defaults to []
-            exclude : list
+            exclude : list | set
                 Weight names to exclude, defaults to []
         Returns
         -------
@@ -133,6 +133,10 @@ def partial_weight(self, include=[], exclude=[]):
             raise ValueError(
                 "Need to specify exactly one of the 'exclude' or 'include' arguments."
             )
+        if include and not isinstance(include, (list, set)):
+            raise ValueError("'include' should be a list or set of weight names")
+        if exclude and not isinstance(exclude, (list, set)):
+            raise ValueError("'exclude' should be a list or set of weight names")
 
         names = set(self._weights.keys())
         if include:

diff --git a/coffea/version.py b/coffea/version.py
@@ -30,7 +30,7 @@
 
 import re
 
-__version__ = "2023.4.0.rc3"
+__version__ = "2023.5.0.rc1"
 version = __version__
 version_info = tuple(re.split(r"[-\.]", __version__))