diff --git a/Jenkinsfile b/Jenkinsfile index 7c8eb7d9..c9fff991 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -10,7 +10,7 @@ pipeline { // https://jenkins.io/doc/book/pipeline/syntax/ agent { docker { - image "pavics/workflow-tests:211123-update211216" + image "pavics/workflow-tests:220121" label 'linux && docker' } } diff --git a/binder/Dockerfile b/binder/Dockerfile index 133beae5..37b908e7 100644 --- a/binder/Dockerfile +++ b/binder/Dockerfile @@ -1,4 +1,4 @@ -FROM pavics/workflow-tests:211123-update211216 +FROM pavics/workflow-tests:220121 USER root diff --git a/conftest.py b/conftest.py index 51632802..00953437 100644 --- a/conftest.py +++ b/conftest.py @@ -2,4 +2,5 @@ def pytest_collectstart(collector): # Make sure ancestor folder name do not end with `.ipynb`, else we have # AttributeError: 'Session' object has no attribute 'skip_compare'. if collector.fspath and collector.fspath.ext == '.ipynb': - collector.skip_compare += 'text/html', 'application/javascript', + collector.skip_compare += ('text/html', 'application/javascript', + 'application/vnd.holoviews_load.v0+json',) diff --git a/docker/Dockerfile b/docker/Dockerfile index c5aeb4b6..903082e7 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,6 +1,10 @@ FROM continuumio/miniconda3 -RUN conda update conda +# Use mamba for much improved performance over conda. +# The 'channel_priority strict' did help conda but it was not enough. +RUN conda update conda -n base && \ + conda install mamba -n base -c defaults -c conda-forge && \ + conda config --set channel_priority strict # to checkout other notebooks and to run pip install RUN apt-get update && \ @@ -9,18 +13,40 @@ RUN apt-get update && \ firefox-esr x11-utils && \ apt-get clean -COPY environment.yml /environment.yml - -# needed for our specific jenkins +# Create user jenkins for our Jenkins e2e notebooks test suite. +# Change /opt/conda folder permissions for jupyter-conda extension. RUN groupadd --gid 1000 jenkins \ - && useradd --uid 1000 --gid jenkins --create-home jenkins + && useradd --uid 1000 --gid jenkins --create-home jenkins && \ + chmod -R a+rwx /opt/conda -# Change these folders' permissions for jupyter-conda extension -RUN chmod -R a+rwx /opt/conda +COPY environment.yml /environment.yml # create env "birdy" # use umask 0000 so that the files for the new environment are usable by user 'jenkins' for the jupyter-conda-extension -RUN umask 0000 && conda env create -f /environment.yml +# +# Perform 2 stages install because one single 'conda env create -f +# /environment.yml' was taking forever to complete, same with mamba. +# Had to do this 2 stages install. 2 stages install was also taking forever +# with conda so had to switch to mamba. +# +# One single 'conda env create -f /environment.yml' takes forever because we +# removed all direct dependencies of xclim and ravenpy in /environment.yml for +# dependencies pinning by xclim and ravenpy to take effect. This results in +# conda having a lot more packages to "solve" and it seems the solver +# performance dropped exponentially with the number of packages to solve. +# +# Conda was stuck at this step: +# DEBUG conda.common._logic:_run_sat(607): Invoking SAT with clause count: 2500273 +# +# Pin python=3.9 because python 3.10 cause this error: +# Encountered problems while solving: +# - package cartopy-0.20.1-py310h902574e_5 requires geos >=3.10.1,<3.10.2.0a0, but none of the providers can be installed +# This means there is no py310 build for geos package but https://anaconda.org/conda-forge/geos/files seems to be python indepdendent ! +# Pin python=3.8 because according to DavidH, xESMF has not been tested with 3.9 yet. +RUN umask 0000 && \ + mamba create --name birdy --channel conda-forge --channel defaults xclim ravenpy python=3.8 && \ + mamba env update --name birdy --file /environment.yml && \ + conda remove mamba -n base # alternate way to 'source activate birdy' ENV PATH="/opt/conda/envs/birdy/bin:$PATH" @@ -33,35 +59,41 @@ RUN python -m ipykernel install --name birdy # anything accidentally # this is for debug only, all dependencies should be specified in # environment.yml above -# RUN conda install -c conda-forge -c cdat -c bokeh -c plotly -c defaults -n birdy nbdime +# RUN mamba install -c conda-forge -c cdat -c bokeh -c plotly -c defaults -n birdy nbdime # build jupyterlab extensions installed by conda, see `jupyter labextension list` +# Supposedly not needed with jupyterlab v3 anymore but see +# https://github.com/jupyterlab/jupyterlab/issues/11726#issuecomment-998901247 +# TODO: remove 'jupyter lab build' step once all extensions move to prebuilt extensions, +# see comment https://github.com/jupyterlab/jupyterlab/issues/11726#issuecomment-998917305 +# Currently jupyter-dash is holding back this step, see +# https://github.com/plotly/jupyter-dash/issues/49 RUN jupyter lab build -# for ipywidgets to work with jupyter lab (notebooks works out of the box) -RUN jupyter labextension install @jupyter-widgets/jupyterlab-manager \ - && jupyter serverextension enable voila --sys-prefix \ - && jupyter labextension install @jupyter-widgets/jupyterlab-manager jupyter-leaflet \ - && jupyter labextension install jupyterlab-topbar-text \ - jupyterlab-theme-toggle +RUN jupyter serverextension enable voila --sys-prefix # && jupyter labextension install jupyterlab-clipboard -ADD https://raw.githubusercontent.com/jupyter/docker-stacks/master/base-notebook/start.sh /usr/local/bin/ -ADD https://raw.githubusercontent.com/jupyter/docker-stacks/master/base-notebook/start-singleuser.sh /usr/local/bin/ -ADD https://raw.githubusercontent.com/jupyter/docker-stacks/master/base-notebook/start-notebook.sh /usr/local/bin/ -ADD https://raw.githubusercontent.com/jupyter/docker-stacks/master/base-notebook/fix-permissions /usr/local/bin/ -ADD https://raw.githubusercontent.com/jupyter/docker-stacks/master/base-notebook/jupyter_notebook_config.py /etc/jupyter/ -RUN chmod a+rx /usr/local/bin/start.sh /usr/local/bin/start-singleuser.sh /usr/local/bin/start-notebook.sh /usr/local/bin/fix-permissions; \ - chmod a+r /etc/jupyter/jupyter_notebook_config.py +# This should be "master" but commit +# https://github.com/jupyter/docker-stacks/commit/c772e98ac794173d6ed83a08ec249038b27ca3be +# is breaking with us since we do not have user jovyan. +ENV DOCKER_STACKS_COMMIT=709206ac8788475728cc9c992c25fb5f1501bc29 -# For Pavics-landing notebooks to re-create Jupyter env layout: +# /notebook_dir for Pavics-landing notebooks to re-create Jupyter env layout: # /notebook_dir/writable-workspace, /notebook_dir/pavics-homepage. # # Path to the /notebook_dir/pavics-homepage/tutorial_data/*.geojson files are # hardcoded so users can copy the nb to writable-workspace/ dir and still be able # to run them seemlessly from the Jupyter env (without having to also copy # those *.geojson files with the notebooks). -RUN mkdir /notebook_dir && chown jenkins /notebook_dir +RUN wget https://raw.githubusercontent.com/jupyter/docker-stacks/$DOCKER_STACKS_COMMIT/base-notebook/start.sh --output-document /usr/local/bin/start.sh && \ + wget https://raw.githubusercontent.com/jupyter/docker-stacks/$DOCKER_STACKS_COMMIT/base-notebook/start-singleuser.sh --output-document /usr/local/bin/start-singleuser.sh && \ + wget https://raw.githubusercontent.com/jupyter/docker-stacks/$DOCKER_STACKS_COMMIT/base-notebook/start-notebook.sh --output-document /usr/local/bin/start-notebook.sh && \ + wget https://raw.githubusercontent.com/jupyter/docker-stacks/$DOCKER_STACKS_COMMIT/base-notebook/fix-permissions --output-document /usr/local/bin/fix-permissions && \ + mkdir /etc/jupyter && \ + wget https://raw.githubusercontent.com/jupyter/docker-stacks/$DOCKER_STACKS_COMMIT/base-notebook/jupyter_notebook_config.py --output-document /etc/jupyter/jupyter_notebook_config.py && \ + chmod a+rx /usr/local/bin/start.sh /usr/local/bin/start-singleuser.sh /usr/local/bin/start-notebook.sh /usr/local/bin/fix-permissions && \ + chmod a+r /etc/jupyter/jupyter_notebook_config.py && \ + mkdir /notebook_dir && chown jenkins /notebook_dir # problem running start-notebook.sh when being root # the jupyter/base-notebook image also do not default to root user so we do the same here diff --git a/docker/Dockerfile.testing b/docker/Dockerfile.testing index 733478b1..dab68d74 100644 --- a/docker/Dockerfile.testing +++ b/docker/Dockerfile.testing @@ -1,15 +1,15 @@ # For testing quickly without having to do a full rebuild. -FROM pavics/workflow-tests:211123 +FROM pavics/workflow-tests:211221 USER root # Use 'update' for existing and 'install' for new package. # Keep same channel ordering to not revert anything. RUN umask 0000 \ - && conda install -c conda-forge -c cdat -c bokeh -c plotly -c defaults -n birdy "rasterio<=1.2.6" "ravenpy>=0.7.5" + && mamba install -c conda-forge -c cdat -c bokeh -c plotly -c defaults -n birdy "shapely<=1.7.1" "bokeh<=2.3.3" # && pip uninstall -y ravenpy \ -# && conda install -c conda-forge -c cdat -c bokeh -c plotly -c defaults -n birdy ravenpy aiohttp +# && mamba install -c conda-forge -c cdat -c bokeh -c plotly -c defaults -n birdy ravenpy aiohttp # RUN apt-get update && \ # DEBIAN_FRONTEND=noninteractive apt-get install -y unzip && \ diff --git a/docker/environment.yml b/docker/environment.yml index 39a09be6..9ab608dc 100644 --- a/docker/environment.yml +++ b/docker/environment.yml @@ -6,27 +6,39 @@ channels: - bokeh - plotly # for jupyter-dash - defaults + dependencies: + + # Do not put xclim and ravenpy direct dependencies here to let xclim and ravenpy + # manage their own dependencies pinning. + # + # xclim direct dependencies: https://github.com/conda-forge/xclim-feedstock/blob/master/recipe/meta.yaml + # ravenpy direct dependencies: https://github.com/conda-forge/ravenpy-feedstock/blob/master/recipe/meta.yaml + + # Pin latest xclim and ravenpy to avoid downgrading during the second install + # phase. Mamba is quicker to solve dependencies than conda but it is less + # precise so accidental downgrade happends. + - xclim >= 0.32.1 + - ravenpy >= 0.7.8 + - matplotlib - - xarray - - numpy + # - xarray # from xclim and ravenpy + # - numpy # from xclim and ravenpy - birdy - - owslib>=0.23.0 - - netcdf4 + # - owslib>=0.23.0 # from ravenpy + # - netcdf4 # from ravenpy # https://github.com/ecmwf/cfgrib # Python interface to map GRIB files to the Unidata's Common Data Model v4 # following the CF Conventions. - cfgrib - pydap - - cartopy + - cartopy >= 0.20.1 - descartes - # Pin rasterio for ravenpy, remove on next build. - # See https://github.com/CSHS-CWRA/RavenPy/commit/eae66e9afc30e2381e9119644a0695d1d248c739 - - rasterio <= 1.2.6 - - gdal # for osgeo - - geopandas - - pandas - - rioxarray + # - rasterio # from ravenpy + # - gdal # for osgeo, from ravenpy + # - geopandas # from ravenpy + # - pandas # from xclim and ravenpy + # - rioxarray # from ravenpy - scikit-image - ipyleaflet - threddsclient @@ -48,12 +60,12 @@ dependencies: # pinning hvplot did not solve the problem with violin plot. - hvplot - nc-time-axis - - cftime - - statsmodels # for ravenpy + # - cftime # from xclim and ravenpy + # - statsmodels # for ravenpy # for error 'ImportError: HTTPFileSystem requires "requests" and "aiohttp" to # be installed' with call 'fsspec.filesystem('https')' - aiohttp - - pydantic + # - pydantic # from ravenpy # Intake is a lightweight set of tools for loading and sharing data in data science projects - intake # https://intake.readthedocs.io/en/latest/plugin-directory.html @@ -74,19 +86,34 @@ dependencies: - zarr # https://github.com/dask/s3fs/ - s3fs - - xclim - # Pinning shapely for ravenpy. Remove on next rebuild. - # https://github.com/CSHS-CWRA/RavenPy/blob/f63e1e5b967c0d7c17e679c8f9d6d309a94096e6/environment.yml#L35 - - shapely <=1.7.1 - - ravenpy + # - shapely # from ravenpy + # PIN shapely due to notebook failure + # PAVICStutorial_ClimateDataAnalysis-5Visualization.ipynb: + # /opt/conda/envs/birdy/lib/python3.7/site-packages/shapely/geometry/base.py in array_interface_base(self) + # 324 "removed in Shapely 2.0.", + # 325 ShapelyDeprecationWarning, stacklevel=2) + # --> 326 return self._array_interface_base() + # 327 + # 328 @property + # + # TypeError: 'dict' object is not callable + # + # climex.ipynb: + # /opt/conda/envs/birdy/lib/python3.7/site-packages/cartopy/crs.py:825: ShapelyDeprecationWarning: __len__ for multi-part geometries is deprecated and will be removed in Shapely 2.0. Check the length of the `geoms` property instead to get the number of parts of a multi-part geometry. + # if len(multi_line_string) > 1: + # /opt/conda/envs/birdy/lib/python3.7/site-packages/cartopy/crs.py:877: ShapelyDeprecationWarning: Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry. + # for line in multi_line_string: + # /opt/conda/envs/birdy/lib/python3.7/site-packages/cartopy/crs.py:944: ShapelyDeprecationWarning: __len__ for multi-part geometries is deprecated and will be removed in Shapely 2.0. Check the length of the `geoms` property instead to get the number of parts of a multi-part geometry. + # if len(p_mline) > 0: + # /opt/conda/envs/birdy/lib/python3.7/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/10m_physical/ne_10m_coastline.zip + # warnings.warn(f'Downloading: {url}', DownloadWarning) + - shapely <= 1.7.1 # https://github.com/roocs/clisops - - clisops + - clisops >= 0.8.0 # Universal Regridder for Geospatial Data # https://github.com/pangeo-data/xESMF - # Pin xesmf because latest xesmf-0.6.2 is not compatible with latest clisops-0.7.0. - # xesmf-0.6.1 is buggy. - # Unpin when new compatible clisops is released. - - xesmf <= 0.6.0 + # xesmf-0.6.2 requires clisops>=0.8.0 + - xesmf >= 0.6.2 # https://anaconda.org/anaconda/memory_profiler # Monitor memory consumption of a process as well as line-by-line analysis # of memory consumption for Python programs. @@ -94,7 +121,11 @@ dependencies: # for esgf notebooks - esgf-compute-api - cdms2 - - vcs + # Disable vcs because it was forcing python downgrade to below 3.9. + # See https://github.com/CDAT/vcs/issues/457 + # package vcs-8.1-py_0 requires vtk-cdat >8.1, but none of the providers can be installed + # package vtk-cdat-8.2.0.8.2.1-py38hbc81915_0 requires python >=3.8,<3.9.0a0 *_cpython + #- vcs - mesalib # tests - pytest @@ -105,6 +136,10 @@ dependencies: - notebook - jupyterlab - jupyterhub + # https://ipywidgets.readthedocs.io/en/latest/user_install.html + - ipywidgets + # https://github.com/jupyter-widgets/ipyleaflet + - ipyleaflet # https://github.com/mamba-org/gator (was jupyter_conda) - mamba_gator # to diff .ipynb files @@ -125,6 +160,11 @@ dependencies: # xeus-python: back-end kernel implementing the Jupyter Debug Protocol - xeus-python - jupyter-dash + # Force newer nodejs for 'jupyter lab build' issue + # https://github.com/jupyterlab/jupyterlab/issues/11726#issuecomment-998901247 + # TODO: remove nodejs once all extensions move to prebuilt extensions, see comment + # https://github.com/jupyterlab/jupyterlab/issues/11726#issuecomment-998917305 + - nodejs >= 16.0 # utilities - curl - wget diff --git a/launchcontainer b/launchcontainer index 5cc3bfc1..e4f0a959 100755 --- a/launchcontainer +++ b/launchcontainer @@ -1,7 +1,7 @@ #!/bin/sh -x if [ -z "$DOCKER_IMAGE" ]; then - DOCKER_IMAGE="pavics/workflow-tests:211123-update211216" + DOCKER_IMAGE="pavics/workflow-tests:220121" fi if [ -z "$CONTAINER_NAME" ]; then diff --git a/launchnotebook b/launchnotebook index 898ac9ba..7adddfd1 100755 --- a/launchnotebook +++ b/launchnotebook @@ -7,7 +7,7 @@ if [ -z "$PORT" ]; then fi if [ -z "$DOCKER_IMAGE" ]; then - DOCKER_IMAGE="pavics/workflow-tests:211123-update211216" + DOCKER_IMAGE="pavics/workflow-tests:220121" fi if [ -z "$CONTAINER_NAME" ]; then