# Crunch abrupt-4xCO2 data

In [None]:
%load_ext nb_black

In [None]:
import logging
import os

import netcdf_scm
import netcdf_scm.crunching

import config
import utils

In [None]:
ID = config.ID

In [None]:
SOURCE_FILTER = ".*ACCESS.*"
# SOURCE_FILTER = ".*"
SOURCE_FILTER

In [None]:
with open("source_filter_abrupt-4xCO2.txt", "w") as fh:
    fh.write(SOURCE_FILTER)

In [None]:
NETCDF_SCM_LOGGER = logging.getLogger("netcdf_scm")

In [None]:
STDERR_INFO_HANDLER = logging.StreamHandler()
FORMATTER = logging.Formatter(
    "%(asctime)s %(name)s %(threadName)s - %(levelname)s:  %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)
STDERR_INFO_HANDLER.setFormatter(FORMATTER)
STDERR_INFO_HANDLER.setLevel(logging.INFO)

NETCDF_SCM_LOGGER.setLevel(logging.DEBUG)
NETCDF_SCM_LOGGER.addHandler(STDERR_INFO_HANDLER)

In [None]:
NETCDF_SCM_LOGGER.info("SOURCE_FILTER: %s", SOURCE_FILTER)

In [None]:
netcdf_scm.__version__

In [None]:
!find /data/cmip6/CMIP6/CMIP -mindepth 2 -maxdepth 2 -type d -exec sh -c 'x={};echo $(basename ${x})' \; | sort

## Setup

In [None]:
CRUNCH_DIR = "./{}-irf-calibration-crunch".format(ID)
!mkdir -p {CRUNCH_DIR}
CRUNCH_DIR

## Define custom masks

In [None]:
regions = utils.get_regions()
display(len(regions.split(",")))
regions

In [None]:
population_2020 = utils.load_pop_2020()

In [None]:
population_2020.plot()

In [None]:
population_2020.sel(
    latitude=range(0, 50 + 1), longitude=range(70, 130 + 1), method="nearest"
).plot()

In [None]:
population_2020_iris = utils.get_pop_2020_iris(population_2020)

In [None]:
CRUNCH_NEAREST_REGION = True
CRUNCH_POPULATION_WEIGHTED = True

regions_incl_pop = []
for region in regions.split(","):
    if CRUNCH_NEAREST_REGION:
        region_nearest_resort = "Nearest {}".format(region)
        netcdf_scm.weights.WEIGHTS_FUNCTIONS_WITHOUT_AREA_WEIGHTING[
            region_nearest_resort
        ] = utils.get_natural_earth_50m_scale_nearest_last_resort_region_weights(
            region_nearest_resort, population_2020_iris
        )
        regions_incl_pop.append(region_nearest_resort)

    if CRUNCH_POPULATION_WEIGHTED:
        region_incl_pop = "Popn weighted {}".format(region)
        netcdf_scm.weights.WEIGHTS_FUNCTIONS_WITHOUT_AREA_WEIGHTING[
            region_incl_pop
        ] = utils.get_natural_earth_50m_scale_popn_weighted_region_weights(
            region, population_2020_iris
        )
        regions_incl_pop.append(region_incl_pop)

regions_incl_pop = ",".join(regions_incl_pop)
regions_incl_pop

## Run

In [None]:
mips = ["CMIP"]

scenarios = [
    "abrupt-4xCO2",
    "piControl"
]
members = [
    "r1i1p1f1",
        "r2i1p1f1",
        "r3i1p1f1",
        "r1i1p1f2",
        "r1i1p2f1",
        "r1i1p1f3",
        "r4i1p1f1",
        "r10i1p1f1",
        "r11i1p1f1",
]
variables = ["tas", "rsdt", "rlut", "rsut"]
tables = ["Amon"]


def get_regexp(inl, trail_slash=True):
    if trail_slash:
        return "({})".format(
            "|".join(["{}{}{}".format(os.sep, s, os.sep) for s in inl])
        )

    return "({})".format("|".join(["{}{}".format(os.sep, s) for s in inl]))


mip_regexp = get_regexp(mips)
scenarios_regexp = get_regexp(scenarios, trail_slash=False)
members_regexp = get_regexp(members, trail_slash=False)
variables_regexp = get_regexp(variables)
tables_regexp = get_regexp(tables, trail_slash=False)

regexp = ".*{}.*{}.*{}.*{}.*{}.*{}.*".format(
    mip_regexp,
    SOURCE_FILTER,
    scenarios_regexp,
    members_regexp,
    tables_regexp,
    variables_regexp,
)
display(regexp)

netcdf_scm.crunching._crunch_data(
    "/data/cmip6/CMIP6/CMIP",
    CRUNCH_DIR,
    "Zebedee Nicholls <zebedee.nicholls@climate-energy-college.org>",
    drs="CMIP6Output",
    regexp=regexp,
    regions=regions_incl_pop,
    data_sub_dir="netcdf-scm-crunched",
    force=False,
    small_number_workers=20,
    small_threshold=100,
    medium_number_workers=5,
    medium_threshold=400,
    force_lazy_threshold=600,
    cell_weights=None,
)