# Eobs data analysis

In [None]:
# Small helper lib.
import attribution

# Others.
import iris
import iris.coord_categorisation
import iris.quickplot as qplt
import iris.plot as iplt
import iris_utils
from matplotlib import pyplot as plt
import numpy as np
import scipy.stats as scstats
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import dask
from dask.distributed import Client
import os
import glob
import scipy
import pandas as pd
import geopandas as gpd

In [None]:
client = Client(n_workers=2)
# client

In [None]:
# Get the sweref projection.
sweref = ccrs.epsg(3006)

In [None]:
# This file contains shapes of most countries in the world.
# https://www.naturalearthdata.com/downloads/10m-cultural-vectors/10m-admin-0-boundary-lines/
fname = "/home/sm_erhol/data/ne_10_admin_0_countries/ne_10m_admin_0_countries.shp"

gdf = gpd.read_file(fname)

# Select Sweden.
swe_shapes = gdf[gdf.SOVEREIGNT == "Sweden"].geometry
swe_mainland = swe_shapes.iloc[0].geoms[0]

## Get the data

In [None]:
# First we have to read the gridclim cube
# We only need this for the first extraction, to limit how much data we are dealing with.
base_path = "/nobackup/smhid17/proj/sik/SMHIGridClim_NORDIC-11/v0.9/netcdf/day/pr/"

# This gives a list of files in the base path matchig the wildcard.
files = glob.glob(base_path + "*.nc")

cube = iris.load(files)

removed = iris.util.equalise_attributes(cube)

# We concat on time.
cube = cube.concatenate_cube()

Start working on the EOBS data

In [None]:
base_path_eobs = "/home/rossby/imports/obs/EOBS/EOBS24-0e/EUR-10/remap/EUR-11/day/"
files = glob.glob(base_path_eobs + "pr*.nc")

In [None]:
eobs_cube = iris.load(files)

In [None]:
# We need to equalise the attributes in order to concatenate.
removed = iris.util.equalise_attributes(eobs_cube)

In [None]:
eobs_cube = eobs_cube.concatenate_cube()

In [None]:
# We extract the data over the GridClim region. No need for all of Europe.
ref_lats = grid_latitude = cube.coord("grid_latitude").points
ref_lons = grid_longitude = cube.coord("grid_longitude").points
# Define the constraint.
constraint = iris.Constraint(
    grid_latitude=lambda v: ref_lats.min() <= v <= ref_lats.max(),
    grid_longitude=lambda v: ref_lons.min() <= v <= ref_lons.max(),
)

In [None]:
# Extract.
eobs_cube = eobs_cube.extract(constraint)

In [None]:
eobs_cube

### Mask Sweden

In [None]:
# Create a mask.
mask = iris_utils.mask_from_shape(
    eobs_cube, swe_mainland, coord_names=("grid_latitude", "grid_longitude")
)

In [None]:
# This mask inplace as well.
_ = iris.util.mask_cube(eobs_cube, mask)

## Event definition

161 mm in 24 hours equals an intensity of

In [None]:
threshold = 161 / (24 * 3600)

In [None]:
threshold

## Region selection

In [None]:
# We load in the transformed points generated in the eobs notebook.
# We can do this since the cubes share coordinate system.
mask_points = np.load("./data/region_points_transformed.npy")

In [None]:
# Create the constraint.
region_constraint = iris.Constraint(
    grid_latitude=lambda v: mask_points[:, 1].min() <= v <= mask_points[:, 1].max(),
    grid_longitude=lambda v: mask_points[:, 0].min() <= v <= mask_points[:, 0].max(),
)

In [None]:
# Extract the region
reg_cube = eobs_cube.extract(region_constraint)

Make sure the region selection worked.

In [None]:
fig, ax = plt.subplots(figsize=(7, 9), subplot_kw={"projection": sweref})
iplt.contourf(reg_cube[0, :, :], 30, axes=ax)
ax.coastlines()
# ax.legend();
# Set the extent to put the data into context.
ax.set_extent([10, 20, 50, 75], crs=ccrs.PlateCarree())

## Get Rx1
Now we can start looking at the extremes, e.g. annual Rx1.
In this case Rx1 should simply be the annual max?
Since we already have daily values.

In [None]:
# Add a year categorisation
iris.coord_categorisation.add_year(reg_cube, "time")

Get the annual maximums

In [None]:
rx1_ann = reg_cube.aggregated_by("year", iris.analysis.MAX)

In [None]:
# Note, density is way above one since the bin values are so small.
# e.g. the widht of each bin is ~0.0001, hence integrating = 1
plt.hist(rx1_ann.data.compressed(), density=True);

## Fit a GEV distribution to Rx1.
We use scipy to fit a GEV distribution to this sample.

In [None]:
# Get the GEV dist object
dist = scstats.genextreme
# data
data = rx1_ann.data.compressed()

Run the bootsrap.
Note that if it has already been done before, we can load the results.

In [None]:
# We get an nx3 array of the fit params.
# results = attribution.bootstrap_fit(data, dist)
# np.save("./data/fits_ci_eobs", results)
# If we have the data already
results = np.load("./data/fits_ci_eobs")

In [None]:
fits_ci = np.quantile(results, [0.05, 0.5, 0.95], axis=0)

In [None]:
fits_ci

## Regression to GMST
To scale the above distribution with the use of GMST we first need to fit a regression between the Rx1 and GMST.
The slope of the regression can then be used for the scaling.

But first we load the GISTEMP data from NASA.

In [None]:
# This gives us the smoothed gmst data  for the timespan
# covered by the cube.
gmst_data = attribution.get_gmst(reg_cube)

In [None]:
# Lets get the data of the rx1 cube.
# Reshape to flatten the spatial dimensions.
rx1_ann_data = rx1_ann.data.reshape(rx1_ann.shape[0], -1)

In [None]:
# Check that first dimensions match.
assert rx1_ann_data.shape[0] == gmst_data.shape[0]

In [None]:
# For the linear regression we use Sklearn.
from sklearn.linear_model import LinearRegression

In [None]:
# This can make clever use of the multiregression feature, we want
# know the regression for each point.
reg = LinearRegression().fit(gmst_data, rx1_ann_data)

## Scale distributions

In [None]:
# Create current climate dists with CI
dists_ci = [dist(*fit) for fit in fits_ci]

In [None]:
# Here we get all the scaled distributions.
all_scaled_dists = attribution.scale_distributions(fits_ci, reg, dist)

In [None]:
attribution.plot_distribution(data, dists_ci, all_scaled_dists, title="Rx1 EOBS")

## Probabilities

The probability ratio(s) (PR) for an event the magnitude of the Gävle

In [None]:
prob_ratios = attribution.get_probability_ratios(dists_ci, all_scaled_dists, threshold)

In [None]:
np.save("./data/pr_eobs", prob_ratios)

In [None]:
prob_ratios