# A look at gridclim

In [None]:
# Small helper lib.
import attribution

# Others.
import iris
import iris.coord_categorisation
import iris.quickplot as qplt
import iris.plot as iplt
from matplotlib import pyplot as plt
import numpy as np
import scipy.stats as scstats
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import dask
from dask.distributed import Client
import os
import glob
import scipy
import pandas as pd
import geopandas as gpd

In [None]:
client = Client(n_workers=2)
# client

In [None]:
# Get the sweref projection.
sweref = ccrs.epsg(3006)

In [None]:
# This file contains shapes of most countries in the world.
# https://www.naturalearthdata.com/downloads/10m-cultural-vectors/10m-admin-0-boundary-lines/
fname = "/home/sm_erhol/data/ne_10_admin_0_countries/ne_10m_admin_0_countries.shp"

In [None]:
gdf = gpd.read_file(fname)

In [None]:
# Select Sweden.
swe_shapes = gdf[gdf.SOVEREIGNT == "Sweden"].geometry
swe_mainland = swe_shapes.iloc[0].geoms[0]

## Load the data
Let's load the SweGridClim data.

In [None]:
base_path = "/nobackup/smhid17/proj/sik/SMHIGridClim_NORDIC-11/v0.9/netcdf/day/pr/"

In [None]:
# This gives a list of files in the base path matchig the wildcard.
files = glob.glob(base_path + "*.nc")

In [None]:
cube = iris.load(files)

We want to merge these cubes to one, or concatenate?
But have to remove some attributes first.

In [None]:
removed = iris.util.equalise_attributes(cube)

Now we should hopefully be able to concatenate.

In [None]:
# We concat on time.
cube = cube.concatenate_cube()

In [None]:
cube

Extract data for Sweden

In [None]:
import iris_utils

In [None]:
# Create a mask from a polygon, using func from iris_utils.
# This should work on 2 and 3d cubes.
mask = iris_utils.utils.mask_from_shape(
    cube, swe_mainland, coord_names=("grid_latitude", "grid_longitude")
)

In [None]:
# This will modify the cube in place as well.
iris_utils.mask_cube(cube, mask)

## Event definition
- It rained 161 mm in 24 hours in Gävle during the event.
- This corresponds to an intensity of 

In [None]:
# [mm s or kg/m2/s]
threshold = 161 / (24 * 3600)

In [None]:
threshold

which can define as the event to look for.

We can do this quickly in the whole of GridClim

In [None]:
cube.core_data().max()

This however raises the question, is it a fair comparison to take the daily intensity of the gridded product and compare it to station data like this?

## Region selection
We probably don't want to look over all of Sweden.
Which region should we select the data over?
Some box around Gävle, where data should be homogeneous.

Could make an average map and use this to select an area around POI?

In [None]:
clim_cube = cube.collapsed("time", iris.analysis.MEAN)

In [None]:
mask_points = [[14.5, 14.5, 19.5, 19.5], [57.7, 61.2, 57.7, 61.2]]

In [None]:
# Gävle point
lat = 60.73284099330242
lon = 17.09885344649177
fig, ax = plt.subplots(figsize=(7, 9), subplot_kw={"projection": sweref})
iplt.contourf(clim_cube, 30, axes=ax)
ax.scatter([lon], [lat], s=50, transform=ccrs.PlateCarree(), label="Gävle")
ax.scatter(
    mask_points[0],
    mask_points[1],
    s=50,
    c="k",
    transform=ccrs.PlateCarree(),
    label="Box corners",
)

ax.coastlines()
ax.legend()
ax.set_title("Average precipitation flux");

We then have to convert the coordinates to the CoordSystem of our cube.

In [None]:
# Get the coord system of the cube. Convert it to cartopy.
target_projection = cube.coord_system().as_cartopy_projection()

In [None]:
# Convert mask points to ndarray
points = np.asarray(mask_points)
# Transform them to the cube projection.
transformed_points = target_projection.transform_points(
    ccrs.PlateCarree(), points[0, :], points[1, :]
)

In [None]:
# Save the transformed coordinates of the bounding box.
np.save("./data/region_points_transformed", transformed_points)

Create a constraint from the converted corner coordinates.

In [None]:
# Create the constraint.
region_constraint = iris.Constraint(
    grid_latitude=lambda v: transformed_points[:, 1].min()
    < v
    < transformed_points[:, 1].max(),
    grid_longitude=lambda v: transformed_points[:, 0].min()
    < v
    < transformed_points[:, 0].max(),
)

In [None]:
# And extract the region.
reg_cube = cube.extract(region_constraint)

In [None]:
# reg_cube

Look at the selected data.

In [None]:
# Gävle point
lat = 60.73284099330242
lon = 17.09885344649177
fig, ax = plt.subplots(figsize=(7, 9), subplot_kw={"projection": sweref})
iplt.contourf(reg_cube[0, :, :], 30, axes=ax)
ax.scatter([lon], [lat], s=50, transform=ccrs.PlateCarree(), label="Gävle")
ax.scatter(
    mask_points[0],
    mask_points[1],
    s=20,
    c="k",
    transform=ccrs.PlateCarree(),
    label="Box corners",
)

ax.coastlines()
ax.legend()
# Set the extent to put the data into context.
ax.set_extent([10, 20, 50, 75], crs=ccrs.PlateCarree())

## Fitting an extreme value distribution to Rx1
Now we can start looking at the extremes, e.g. annual Rx1.
In this case Rx1 should simply be the annual max?
Since we already have daily values.

In [None]:
# Add a year categorisation
iris.coord_categorisation.add_year(reg_cube, "time")

Get the annual maximums

In [None]:
rx1_ann = reg_cube.aggregated_by("year", iris.analysis.MAX)

In [None]:
# Note, density is way above one since the bin values are so small.
# e.g. the widht of each bin is ~0.0001, hence integrating = 1
plt.hist(rx1_ann.data.compressed(), density=True);

### Fit a GEV distribution.
We use scipy to fit a GEV distribution to this sample.

In [None]:
# Get the GEV dist object
dist = scstats.genextreme
# data
data = rx1_ann.data.compressed()
# And fit the data
fit = dist.fit(data)

In [None]:
dist.fit(data)

In [None]:
# We get an nx3 array of the fit params.
# results = attribution.bootstrap_fit(data, dist)
# np.save("./data/fits_ci_gridclim", results)
# If we've already ran tha bootstrap.
results = np.load("./data/fits_ci_gridclim")

In [None]:
fits_ci = np.quantile(results, [0.05, 0.5, 0.95], axis=0)

In [None]:
fits_ci

## Regression to GMST
To scale the above distribution with the use of GMST we first need to fit a regression between the Rx1 and GMST.
The slope of the regression can then be used for the scaling.

But first we load the GISTEMP data from NASA.

In [None]:
# This gives us the smoothed gmst data  for the timespan
# covered by the cube.
gmst_data = attribution.get_gmst(reg_cube)

In [None]:
# Lets get the data of the rx1 cube.
# Reshape to flatten the spatial dimensions.
rx1_ann_data = rx1_ann.data.reshape(58, -1)

In [None]:
# Check that first dimensions match.
assert rx1_ann_data.shape[0] == gmst_data.shape[0]

In [None]:
# Uncomment to look at the data.
# fig, ax = plt.subplots(figsize=(7, 7))
# ax.scatter(np.broadcast_to(gmst_data, rx1_ann_data.shape).flatten(),
#                             rx1_ann_data.flatten(), s=5);
# ax.set_xlabel("GMST")
# ax.set_ylabel("Precipitation intensity");
# ax.set_title("Pooled region scatter");

In [None]:
# For the linear regression we use Sklearn.
from sklearn.linear_model import LinearRegression

In [None]:
# This can make clever use of the multiregression feature, we want
# know the regression for each point.
reg = LinearRegression().fit(gmst_data, rx1_ann_data)

We scale the distribution by making the location and scale a function of the temperature anomaly, using the slope of the regression.

$\mu = \mu_0 \mathrm{exp}(\alpha T' / \mu_0),\, \sigma = \sigma_0\mathrm{exp}(\alpha T'/ \mu_0)$

This is implemented in the `attribution.scale_dist_params`

In [None]:
# Create current climate dists with CI
dists_ci = [dist(*fit) for fit in fits_ci]

In [None]:
all_scaled_dists = attribution.scale_distributions(fits_ci, reg, dist)

In [None]:
attribution.plot_distribution(data, dists_ci, all_scaled_dists, title="Rx1 GridClim")

## Probabilities

The probability ratio(s) (PR) for an event the magnitude of the Gävle

In [None]:
prob_ratios = attribution.get_probability_ratios(dists_ci, all_scaled_dists, 0.0018)

In [None]:
np.save("./data/pr_gridclim", prob_ratios)

In [None]:
prob_ratios

Since the PR CI include 1 we cannot make a attribution statement for this event.