diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index b489eba036..a8247a247b 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -16,7 +16,9 @@ jobs:
       IRIS_TEST_DATA_PATH: benchmarks/iris-test-data
       IRIS_TEST_DATA_VERSION: "2.5"
       # Lets us manually bump the cache to rebuild
+      ENV_CACHE_BUILD: "0"
       TEST_DATA_CACHE_BUILD: "2"
+      PY_VER: 3.8
 
     steps:
       # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
@@ -32,19 +34,15 @@ jobs:
         run: |
           pip install nox
 
-      - name: Cache .nox and .asv/env directories
+      - name: Cache environment directories
         id: cache-env-dir
         uses: actions/cache@v2
         with:
           path: |
             .nox
             benchmarks/.asv/env
-          # Make sure GHA never gets an exact cache match by using the unique
-          #  github.sha. This means it will always store this run as a new
-          #  cache (Nox may have made relevant changes during run). Cache
-          #  restoration still succeeds via the partial restore-key match.
-          key: ${{ runner.os }}-${{ github.sha }}
-          restore-keys: ${{ runner.os }}
+            $CONDA/pkgs
+          key: ${{ runner.os }}-${{ hashFiles('requirements/') }}-${{ env.ENV_CACHE_BUILD }}
 
       - name: Cache test data directory
         id: cache-test-data
@@ -62,7 +60,7 @@ jobs:
           unzip -q iris-test-data.zip
           mkdir --parents ${GITHUB_WORKSPACE}/${IRIS_TEST_DATA_LOC_PATH}
           mv iris-test-data-${IRIS_TEST_DATA_VERSION} ${GITHUB_WORKSPACE}/${IRIS_TEST_DATA_PATH}
-          
+
       - name: Set test data var
         run: |
           echo "OVERRIDE_TEST_DATA_REPOSITORY=${GITHUB_WORKSPACE}/${IRIS_TEST_DATA_PATH}/test_data" >> $GITHUB_ENV
diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py
index 2e741c3da0..4a964a648d 100644
--- a/benchmarks/benchmarks/__init__.py
+++ b/benchmarks/benchmarks/__init__.py
@@ -5,45 +5,4 @@
 # licensing details.
 """Common code for benchmarks."""
 
-import os
-from pathlib import Path
-
-# Environment variable names
-_ASVDIR_VARNAME = "ASV_DIR"  # As set in nightly script "asv_nightly/asv.sh"
-_DATADIR_VARNAME = "BENCHMARK_DATA"  # For local runs
-
 ARTIFICIAL_DIM_SIZE = int(10e3)  # For all artificial cubes, coords etc.
-
-# Work out where the benchmark data dir is.
-asv_dir = os.environ.get("ASV_DIR", None)
-if asv_dir:
-    # For an overnight run, this comes from the 'ASV_DIR' setting.
-    benchmark_data_dir = Path(asv_dir) / "data"
-else:
-    # For a local run, you set 'BENCHMARK_DATA'.
-    benchmark_data_dir = os.environ.get(_DATADIR_VARNAME, None)
-    if benchmark_data_dir is not None:
-        benchmark_data_dir = Path(benchmark_data_dir)
-
-
-def testdata_path(*path_names):
-    """
-    Return the path of a benchmark test data file.
-
-    These are based from a test-data location dir, which is either
-    ${}/data (for overnight tests), or ${} for local testing.
-
-    If neither of these were set, an error is raised.
-
-    """.format(
-        _ASVDIR_VARNAME, _DATADIR_VARNAME
-    )
-    if benchmark_data_dir is None:
-        msg = (
-            "Benchmark data dir is not defined : "
-            'Either "${}" or "${}" must be set.'
-        )
-        raise (ValueError(msg.format(_ASVDIR_VARNAME, _DATADIR_VARNAME)))
-    path = benchmark_data_dir.joinpath(*path_names)
-    path = str(path)  # Because Iris doesn't understand Path objects yet.
-    return path
diff --git a/benchmarks/benchmarks/generate_data/__init__.py b/benchmarks/benchmarks/generate_data/__init__.py
new file mode 100644
index 0000000000..a56f2e4623
--- /dev/null
+++ b/benchmarks/benchmarks/generate_data/__init__.py
@@ -0,0 +1,94 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""
+Scripts for generating supporting data for benchmarking.
+
+Data generated using Iris should use :func:`run_function_elsewhere`, which
+means that data is generated using a fixed version of Iris and a fixed
+environment, rather than those that get changed when the benchmarking run
+checks out a new commit.
+
+Downstream use of data generated 'elsewhere' requires saving; usually in a
+NetCDF file. Could also use pickling but there is a potential risk if the
+benchmark sequence runs over two different Python versions.
+
+"""
+from inspect import getsource
+from os import environ
+from pathlib import Path
+from subprocess import CalledProcessError, check_output, run
+from textwrap import dedent
+
+#: Python executable used by :func:`run_function_elsewhere`, set via env
+#:  variable of same name. Must be path of Python within an environment that
+#:  includes Iris (including dependencies and test modules) and Mule.
+try:
+    DATA_GEN_PYTHON = environ["DATA_GEN_PYTHON"]
+    _ = check_output([DATA_GEN_PYTHON, "-c", "a = True"])
+except KeyError:
+    error = "Env variable DATA_GEN_PYTHON not defined."
+    raise KeyError(error)
+except (CalledProcessError, FileNotFoundError, PermissionError):
+    error = (
+        "Env variable DATA_GEN_PYTHON not a runnable python executable path."
+    )
+    raise ValueError(error)
+
+# The default location of data files used in benchmarks. Used by CI.
+default_data_dir = (Path(__file__).parents[2] / ".data").resolve()
+# Optionally override the default data location with environment variable.
+BENCHMARK_DATA = Path(environ.get("BENCHMARK_DATA", default_data_dir))
+if BENCHMARK_DATA == default_data_dir:
+    BENCHMARK_DATA.mkdir(exist_ok=True)
+elif not BENCHMARK_DATA.is_dir():
+    message = f"Not a directory: {BENCHMARK_DATA} ."
+    raise ValueError(message)
+
+# Manual flag to allow the rebuilding of synthetic data.
+#  False forces a benchmark run to re-make all the data files.
+REUSE_DATA = True
+
+
+def run_function_elsewhere(func_to_run, *args, **kwargs):
+    """
+    Run a given function using the :const:`DATA_GEN_PYTHON` executable.
+
+    This structure allows the function to be written natively.
+
+    Parameters
+    ----------
+    func_to_run : FunctionType
+        The function object to be run.
+        NOTE: the function must be completely self-contained, i.e. perform all
+        its own imports (within the target :const:`DATA_GEN_PYTHON`
+        environment).
+    *args : tuple, optional
+        Function call arguments. Must all be expressible as simple literals,
+        i.e. the ``repr`` must be a valid literal expression.
+    **kwargs: dict, optional
+        Function call keyword arguments. All values must be expressible as
+        simple literals (see ``*args``).
+
+    Returns
+    -------
+    str
+        The ``stdout`` from the run.
+
+    """
+    func_string = dedent(getsource(func_to_run))
+    func_string = func_string.replace("@staticmethod\n", "")
+    func_call_term_strings = [repr(arg) for arg in args]
+    func_call_term_strings += [
+        f"{name}={repr(val)}" for name, val in kwargs.items()
+    ]
+    func_call_string = (
+        f"{func_to_run.__name__}(" + ",".join(func_call_term_strings) + ")"
+    )
+    python_string = "\n".join([func_string, func_call_string])
+    result = run(
+        [DATA_GEN_PYTHON, "-c", python_string], capture_output=True, check=True
+    )
+    return result.stdout
diff --git a/benchmarks/benchmarks/generate_data/um_files.py b/benchmarks/benchmarks/generate_data/um_files.py
new file mode 100644
index 0000000000..8792fcc48b
--- /dev/null
+++ b/benchmarks/benchmarks/generate_data/um_files.py
@@ -0,0 +1,215 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""
+Generate FF, PP and NetCDF files based on a minimal synthetic FF file.
+
+NOTE: uses the Mule package, so depends on an environment with Mule installed.
+"""
+
+
+def _create_um_files(
+    len_x: int, len_y: int, len_z: int, len_t: int, compress, save_paths: dict
+) -> None:
+    """
+    Generate an FF object of given shape and compression, save to FF/PP/NetCDF.
+
+    This is run externally
+    (:func:`benchmarks.generate_data.run_function_elsewhere`), so all imports
+    are self-contained and input parameters are simple types.
+    """
+    from copy import deepcopy
+    from datetime import datetime
+    from tempfile import NamedTemporaryFile
+
+    from mo_pack import compress_wgdos as mo_pack_compress
+    from mule import ArrayDataProvider, Field3, FieldsFile
+    from mule.pp import fields_to_pp_file
+    import numpy as np
+
+    from iris import load_cube
+    from iris import save as save_cube
+
+    def packing_patch(*compress_args, **compress_kwargs) -> bytes:
+        """
+        Force conversion from returned :class:`memoryview` to :class:`bytes`.
+
+        Downstream uses of :func:`mo_pack.compress_wgdos` were written
+        for the ``Python2`` behaviour, where the returned buffer had a
+        different ``__len__`` value to the current :class:`memoryview`.
+        Unable to fix directly in Mule, so monkey patching for now.
+        """
+        return mo_pack_compress(*compress_args, **compress_kwargs).tobytes()
+
+    import mo_pack
+
+    mo_pack.compress_wgdos = packing_patch
+
+    ########
+
+    template = {
+        "fixed_length_header": {"dataset_type": 3, "grid_staggering": 3},
+        "integer_constants": {
+            "num_p_levels": len_z,
+            "num_cols": len_x,
+            "num_rows": len_y,
+        },
+        "real_constants": {},
+        "level_dependent_constants": {"dims": (len_z + 1, None)},
+    }
+    new_ff = FieldsFile.from_template(deepcopy(template))
+
+    data_array = np.arange(len_x * len_y).reshape(len_x, len_y)
+    array_provider = ArrayDataProvider(data_array)
+
+    def add_field(level_: int, time_step_: int) -> None:
+        """
+        Add a minimal field to the new :class:`~mule.FieldsFile`.
+
+        Includes the minimum information to allow Mule saving and Iris
+        loading, as well as incrementation for vertical levels and time
+        steps to allow generation of z and t dimensions.
+        """
+        new_field = Field3.empty()
+        # To correspond to the header-release 3 class used.
+        new_field.lbrel = 3
+        # Mule uses the first element of the lookup to test for
+        #  unpopulated fields (and skips them), so the first element should
+        #  be set to something. The year will do.
+        new_field.raw[1] = datetime.now().year
+
+        # Horizontal.
+        new_field.lbcode = 1
+        new_field.lbnpt = len_x
+        new_field.lbrow = len_y
+        new_field.bdx = new_ff.real_constants.col_spacing
+        new_field.bdy = new_ff.real_constants.row_spacing
+        new_field.bzx = new_ff.real_constants.start_lon - 0.5 * new_field.bdx
+        new_field.bzy = new_ff.real_constants.start_lat - 0.5 * new_field.bdy
+
+        # Hemisphere.
+        new_field.lbhem = 32
+        # Processing.
+        new_field.lbproc = 0
+
+        # Vertical.
+        # Hybrid height values by simulating sequences similar to those in a
+        #  theta file.
+        new_field.lbvc = 65
+        if level_ == 0:
+            new_field.lblev = 9999
+        else:
+            new_field.lblev = level_
+
+        level_1 = level_ + 1
+        six_rec = 20 / 3
+        three_rec = six_rec / 2
+
+        new_field.blev = level_1 ** 2 * six_rec - six_rec
+        new_field.brsvd1 = (
+            level_1 ** 2 * six_rec + (six_rec * level_1) - three_rec
+        )
+
+        brsvd2_simulated = np.linspace(0.995, 0, len_z)
+        shift = min(len_z, 2)
+        bhrlev_simulated = np.concatenate(
+            [np.ones(shift), brsvd2_simulated[:-shift]]
+        )
+        new_field.brsvd2 = brsvd2_simulated[level_]
+        new_field.bhrlev = bhrlev_simulated[level_]
+
+        # Time.
+        new_field.lbtim = 11
+
+        new_field.lbyr = time_step_
+        for attr_name in ["lbmon", "lbdat", "lbhr", "lbmin", "lbsec"]:
+            setattr(new_field, attr_name, 0)
+
+        new_field.lbyrd = time_step_ + 1
+        for attr_name in ["lbmond", "lbdatd", "lbhrd", "lbmind", "lbsecd"]:
+            setattr(new_field, attr_name, 0)
+
+        # Data and packing.
+        new_field.lbuser1 = 1
+        new_field.lbpack = int(compress)
+        new_field.bacc = 0
+        new_field.bmdi = -1
+        new_field.lbext = 0
+        new_field.set_data_provider(array_provider)
+
+        new_ff.fields.append(new_field)
+
+    for time_step in range(len_t):
+        for level in range(len_z):
+            add_field(level, time_step + 1)
+
+    ff_path = save_paths.get("FF", None)
+    pp_path = save_paths.get("PP", None)
+    nc_path = save_paths.get("NetCDF", None)
+
+    if ff_path:
+        new_ff.to_file(ff_path)
+    if pp_path:
+        fields_to_pp_file(str(pp_path), new_ff.fields)
+    if nc_path:
+        temp_ff_path = None
+        # Need an Iris Cube from the FF content.
+        if ff_path:
+            # Use the existing file.
+            ff_cube = load_cube(ff_path)
+        else:
+            # Make a temporary file.
+            temp_ff_path = NamedTemporaryFile()
+            new_ff.to_file(temp_ff_path.name)
+            ff_cube = load_cube(temp_ff_path.name)
+
+        save_cube(ff_cube, nc_path, zlib=compress)
+        if temp_ff_path:
+            temp_ff_path.close()
+
+
+FILE_EXTENSIONS = {"FF": "", "PP": ".pp", "NetCDF": ".nc"}
+
+
+def create_um_files(
+    len_x: int,
+    len_y: int,
+    len_z: int,
+    len_t: int,
+    compress: bool,
+    file_types: list,
+) -> dict:
+    """
+    Generate FF-based FF / PP / NetCDF files with specified shape and compression.
+
+    All files representing a given shape are saved in a dedicated directory. A
+    dictionary of the saved paths is returned.
+
+    If the required files exist, they are re-used, unless
+    :const:`benchmarks.REUSE_DATA` is ``False``.
+    """
+    # Self contained imports to avoid linting confusion with _create_um_files().
+    from . import BENCHMARK_DATA, REUSE_DATA, run_function_elsewhere
+
+    save_name_sections = ["UM", len_x, len_y, len_z, len_t]
+    save_name = "_".join(str(section) for section in save_name_sections)
+    save_dir = BENCHMARK_DATA / save_name
+    if not save_dir.is_dir():
+        save_dir.mkdir(parents=True)
+
+    save_paths = {}
+    files_exist = True
+    for file_type in file_types:
+        file_ext = FILE_EXTENSIONS[file_type]
+        save_path = (save_dir / f"{compress}").with_suffix(file_ext)
+        files_exist = files_exist and save_path.is_file()
+        save_paths[file_type] = str(save_path)
+
+    if not REUSE_DATA or not files_exist:
+        _ = run_function_elsewhere(
+            _create_um_files, len_x, len_y, len_z, len_t, compress, save_paths
+        )
+
+    return save_paths
diff --git a/benchmarks/benchmarks/loading.py b/benchmarks/benchmarks/loading.py
new file mode 100644
index 0000000000..4558c3b5cb
--- /dev/null
+++ b/benchmarks/benchmarks/loading.py
@@ -0,0 +1,185 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""
+File loading benchmark tests.
+
+Where applicable benchmarks should be parameterised for two sizes of input data:
+  * minimal: enables detection of regressions in parts of the run-time that do
+             NOT scale with data size.
+  * large: large enough to exclusively detect regressions in parts of the
+           run-time that scale with data size. Size should be _just_ large
+           enough - don't want to bloat benchmark runtime.
+
+"""
+
+from iris import AttributeConstraint, Constraint, load, load_cube
+from iris.cube import Cube
+from iris.fileformats.um import structured_um_loading
+
+from .generate_data import BENCHMARK_DATA, REUSE_DATA, run_function_elsewhere
+from .generate_data.um_files import create_um_files
+
+
+class LoadAndRealise:
+    params = [
+        [(2, 2, 2), (1280, 960, 5)],
+        [False, True],
+        ["FF", "PP", "NetCDF"],
+    ]
+    param_names = ["xyz", "compressed", "file_format"]
+
+    def setup_cache(self) -> dict:
+        file_type_args = self.params[2]
+        file_path_dict = {}
+        for xyz in self.params[0]:
+            file_path_dict[xyz] = {}
+            x, y, z = xyz
+            for compress in self.params[1]:
+                file_path_dict[xyz][compress] = create_um_files(
+                    x, y, z, 1, compress, file_type_args
+                )
+        return file_path_dict
+
+    def setup(
+        self,
+        file_path_dict: dict,
+        xyz: tuple,
+        compress: bool,
+        file_format: str,
+    ) -> None:
+        self.file_path = file_path_dict[xyz][compress][file_format]
+        self.cube = self.load()
+
+    def load(self) -> Cube:
+        return load_cube(self.file_path)
+
+    def time_load(self, _, __, ___, ____) -> None:
+        _ = self.load()
+
+    def time_realise(self, _, __, ___, ____) -> None:
+        # Don't touch cube.data - permanent realisation plays badly with ASV's
+        #  re-run strategy.
+        assert self.cube.has_lazy_data()
+        self.cube.core_data().compute()
+
+
+class STASHConstraint:
+    # xyz sizes mimic LoadAndRealise to maximise file re-use.
+    params = [[(2, 2, 2), (1280, 960, 5)], ["FF", "PP"]]
+    param_names = ["xyz", "file_format"]
+
+    def setup_cache(self) -> dict:
+        file_type_args = self.params[1]
+        file_path_dict = {}
+        for xyz in self.params[0]:
+            x, y, z = xyz
+            file_path_dict[xyz] = create_um_files(
+                x, y, z, 1, False, file_type_args
+            )
+        return file_path_dict
+
+    def setup(
+        self, file_path_dict: dict, xyz: tuple, file_format: str
+    ) -> None:
+        self.file_path = file_path_dict[xyz][file_format]
+
+    def time_stash_constraint(self, _, __, ___) -> None:
+        _ = load_cube(self.file_path, AttributeConstraint(STASH="m??s??i901"))
+
+
+class TimeConstraint:
+    params = [[3, 20], ["FF", "PP", "NetCDF"]]
+    param_names = ["time_dim_len", "file_format"]
+
+    def setup_cache(self) -> dict:
+        file_type_args = self.params[1]
+        file_path_dict = {}
+        for time_dim_len in self.params[0]:
+            file_path_dict[time_dim_len] = create_um_files(
+                20, 20, 5, time_dim_len, False, file_type_args
+            )
+        return file_path_dict
+
+    def setup(
+        self, file_path_dict: dict, time_dim_len: int, file_format: str
+    ) -> None:
+        self.file_path = file_path_dict[time_dim_len][file_format]
+        self.time_constr = Constraint(time=lambda cell: cell.point.year < 3)
+
+    def time_time_constraint(self, _, __, ___) -> None:
+        _ = load_cube(self.file_path, self.time_constr)
+
+
+class ManyVars:
+    FILE_PATH = BENCHMARK_DATA / "many_var_file.nc"
+
+    @staticmethod
+    def _create_file(save_path: str) -> None:
+        """Is run externally - everything must be self-contained."""
+        import numpy as np
+
+        from iris import save
+        from iris.coords import AuxCoord
+        from iris.cube import Cube
+
+        data_len = 8
+        data = np.arange(data_len)
+        cube = Cube(data, units="unknown")
+        extra_vars = 80
+        names = ["coord_" + str(i) for i in range(extra_vars)]
+        for name in names:
+            coord = AuxCoord(data, long_name=name, units="unknown")
+            cube.add_aux_coord(coord, 0)
+        save(cube, save_path)
+
+    def setup_cache(self) -> None:
+        if not REUSE_DATA or not self.FILE_PATH.is_file():
+            # See :mod:`benchmarks.generate_data` docstring for full explanation.
+            _ = run_function_elsewhere(
+                self._create_file,
+                str(self.FILE_PATH),
+            )
+
+    def time_many_var_load(self) -> None:
+        _ = load(str(self.FILE_PATH))
+
+
+class StructuredFF:
+    """
+    Test structured loading of a large-ish fieldsfile.
+
+    Structured load of the larger size should show benefit over standard load,
+    avoiding the cost of merging.
+    """
+
+    params = [[(2, 2, 2), (1280, 960, 5)], [False, True]]
+    param_names = ["xyz", "structured_loading"]
+
+    def setup_cache(self) -> dict:
+        file_path_dict = {}
+        for xyz in self.params[0]:
+            x, y, z = xyz
+            file_path_dict[xyz] = create_um_files(x, y, z, 1, False, ["FF"])
+        return file_path_dict
+
+    def setup(self, file_path_dict, xyz, structured_load):
+        self.file_path = file_path_dict[xyz]["FF"]
+        self.structured_load = structured_load
+
+    def load(self):
+        """Load the whole file (in fact there is only 1 cube)."""
+
+        def _load():
+            _ = load(self.file_path)
+
+        if self.structured_load:
+            with structured_um_loading():
+                _load()
+        else:
+            _load()
+
+    def time_structured_load(self, _, __, ___):
+        self.load()
diff --git a/noxfile.py b/noxfile.py
index 6367b74aef..0600540c5b 100755
--- a/noxfile.py
+++ b/noxfile.py
@@ -289,7 +289,7 @@ def linkcheck(session: nox.sessions.Session):
     )
 
 
-@nox.session(python=PY_VER[-1], venv_backend="conda")
+@nox.session(python=PY_VER, venv_backend="conda")
 @nox.parametrize(
     ["ci_mode"],
     [True, False],
@@ -297,7 +297,7 @@ def linkcheck(session: nox.sessions.Session):
 )
 def benchmarks(session: nox.sessions.Session, ci_mode: bool):
     """
-    Perform esmf-regrid performance benchmarks (using Airspeed Velocity).
+    Perform Iris performance benchmarks (using Airspeed Velocity).
 
     Parameters
     ----------
@@ -315,6 +315,47 @@ def benchmarks(session: nox.sessions.Session, ci_mode: bool):
 
     """
     session.install("asv", "nox")
+
+    data_gen_var = "DATA_GEN_PYTHON"
+    if data_gen_var in os.environ:
+        print("Using existing data generation environment.")
+    else:
+        print("Setting up the data generation environment...")
+        # Get Nox to build an environment for the `tests` session, but don't
+        #  run the session. Will re-use a cached environment if appropriate.
+        session.run_always(
+            "nox",
+            "--session=tests",
+            "--install-only",
+            f"--python={session.python}",
+        )
+        # Find the environment built above, set it to be the data generation
+        #  environment.
+        data_gen_python = next(
+            Path(".nox").rglob(f"tests*/bin/python{session.python}")
+        ).resolve()
+        session.env[data_gen_var] = data_gen_python
+
+        mule_dir = data_gen_python.parents[1] / "resources" / "mule"
+        if not mule_dir.is_dir():
+            print("Installing Mule into data generation environment...")
+            session.run_always(
+                "git",
+                "clone",
+                "https://github.com/metomi/mule.git",
+                str(mule_dir),
+                external=True,
+            )
+        session.run_always(
+            str(data_gen_python),
+            "-m",
+            "pip",
+            "install",
+            str(mule_dir / "mule"),
+            external=True,
+        )
+
+    print("Running ASV...")
     session.cd("benchmarks")
     # Skip over setup questions for a new machine.
     session.run("asv", "machine", "--yes")