For this 7th tutorial we will study climate model output for different cities in the world, and look at how extreme temperature and precipitation changes over time and depending on emission scenarios

In [None]:
# #install dependencies - taken from <Yosmely Bermúdez> comments for Tutorial 6
# # We need this to install eigen which is needed for SDFC to install correctly
# !pip install -q condacolab
# import condacolab
# condacolab.install()
# !conda install eigen
# !pip install -v https://github.com/yrobink/SDFC/archive/master.zip#subdirectory=python
# !pip install https://github.com/njleach/mystatsfunctions/archive/master.zip
# !pip install numpy
# !pip install matplotlib
# !pip install seaborn
# !pip install pandas
# !pip install cartopy
# !pip install scipy
# !pip install texttable

In [None]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import cartopy.crs as ccrs
from scipy import stats

In [None]:
import extremes_functions as ef
from mystatsfunctions import OLSE,LMoments
import SDFC as sd

In [None]:
gev = stats.genextreme

In [None]:
def estimate_return_level(quantile,model):
    loc, scale, shape = model.coef_
    level = loc - scale / shape * (1 - (-np.log(quantile))**(-shape))
    # level = stats.genextreme.ppf(quantile,-shape,loc=loc,scale=scale)
    return level

## Load CMIP6 data

## Shortcut

In [None]:
import os, pooch
fname = 'cmip6_data_city_daily_scenarios_tasmax_pr_models.nc'
if not os.path.exists(fname):
    url = "https://osf.io/ngafk/download"
    fname = pooch.retrieve(url, known_hash=None)

# data = xr.open_dataset('cmip6_data_city_daily_scenarios.nc')
data = xr.open_dataset(fname)

## Through pangeo

The code below recreates the data loaded in the previous line.

In this way you can access large amounts of climate model output that has been stored in the cloud. This is very useful to get easy access to such information. Feel free to modify the code to access different data and address your own questions.

In [None]:
import intake

from xmip.preprocessing import combined_preprocessing
from xarrayutils.plotting import shaded_line_plot

from xmip.utils import google_cmip_col
# we could do all of this with pure pandas on the underlying csv file
col = google_cmip_col()

In [None]:
cat = col.search(
    # source_id=['IPSL-CM6A-LR', 'GFDL-ESM4', 'ACCESS-CM2', 'MPI-ESM1-2-LR', 'TaiESM1'],
    # source_id=['CESM2','MPI-ESM1-2-HR'],
    source_id='MPI-ESM1-2-HR',
    variable_id=['pr','tas'],
    member_id='r1i1p1f1', #
    table_id='day',
    grid_label='gn',
    experiment_id = ['historical','ssp126', 'ssp245', 'ssp585'],
    # experiment_id = ['historical', 'ssp126', 'ssp585'],
    # require_all_on = ['source_id']
)
kwargs = dict(preprocess=combined_preprocessing, xarray_open_kwargs=dict(use_cftime=True))
ds_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True})

Define one or more locations

In [None]:
sel_Hamburg = dict(lon=10,lat=53.5)
sel_Madrid = dict(lon=360-42,lat=40.5)
sel_Delhi = dict(lon=77,lat=28.5)
sel_Kinshasa = dict(lon=15,lat=-4)
sel_Phoenix = dict(lon=360-112,lat=33.5)
sel_Sydney = dict(lon=151,lat=-33.85)

sels = dict(
    Hamburg = sel_Hamburg,
    Madrid = sel_Madrid,
    Delhi = sel_Delhi,
    Kinshasa = sel_Kinshasa,
    Phoenix = sel_Phoenix,
    Sydney = sel_Sydney
)

Assemble your data

In [None]:
ds_dict.keys()

In [None]:
model = 'MPI-ESM1-2-HR'
scenarios = ['ssp126','ssp245','ssp585']
outs = []
for city in sels.keys():
    outis = []
    for scenario in scenarios:
        timeseries = xr.concat(
                    [
                        ds_dict['CMIP.MPI-M.%s.%s.day.gn' % (model, 'historical')].sel(sels[city],method='nearest').sel(time=slice('2014')),
                        ds_dict['ScenarioMIP.DKRZ.%s.%s.day.gn' % (model, scenario)].sel(sels[city],method='nearest')
                    ],'time'
                    )
        timeseries = timeseries.assign_coords(city=city,scenario=scenario).squeeze()
        outis.append(timeseries)
        # outis.append(ds_dict[key].sel(sels[city],method='nearest').assign_coords(city=city,scenario=key).squeeze())
    outs.append(xr.concat(outis,'scenario'))
data = xr.concat(outs,'city')

In [None]:
print(f'The data has {data.nbytes / 1e6:.3f} MB')

In [None]:
# data.load()

## Processing

Look at the data for one selected city, for one climate model

In [None]:
city = 'Madrid'

In [None]:
# data_city = data.sel(city=city,model='MPI-ESM1-2-HR')

In [None]:
# data_city

The data is has daily resolution, for three climate scenarios - those start in 2014, so until 2014 they are the same and then diverge

In [None]:
# fig, ax = plt.subplots(2,sharex=True,figsize=(10,3),constrained_layout=True)
# data_city['tasmax'].plot(hue='scenario',ax=ax[0])
# data_city['pr'].plot(hue='scenario',ax=ax[1])

# ax[0].set_title('Daily maximum surface air temperature')
# ax[1].set_title('Precipitation')

Do the same with a 30-year rolling mean

In [None]:
# fig, ax = plt.subplots(2,sharex=True,figsize=(10,3),constrained_layout=True)
# data_city['tasmax'].rolling(time=30*365).mean().plot(hue='scenario',ax=ax[0])
# data_city['pr'].rolling(time=30*365).mean().plot(hue='scenario',ax=ax[1],add_legend=False)

# ax[0].set_title('Daily maximum surface air temperature')
# ax[1].set_title('Precipitation')

In the previous tutorials we have been operating on annual maxima data. That is want to take the day in each year with the highest temperature or the largest amount of rainfall

Let's focus on precipitation for now, and compute the maximum for each year

In [None]:
import xarray as xr
xr.DataArray(
     np.linspace(0, 11, num=12),
     coords=[
         pd.date_range(
             "1999-12-15",
             periods=12,
             freq=pd.DateOffset(months=1),
         )
     ],
     dims="time",
).resample(time="QS-DEC").mean()

In [None]:
# pr_city = data_city['pr']
# pr_city_max= pr_city.resample(time='1Y').max()

In [None]:
# pr_city.groupby('time.year').max()

In [None]:
# fig, ax = plt.subplots()
# pr_city_max.plot(hue='scenario',ax=ax)

# ax.set_title('Annual maximum precipitation')

Repeat the previous analysis: take the historical run (1850-2014), look at three 30-year periods, and compute Extreme Values. Change the periods because the historical run ends in 2014. Therefore just select one of the scenarios.

In [None]:
# data_period1 = pr_city_max.sel(scenario='ssp245',time=slice('2014')).sel(time=slice('1925','1954')).to_pandas()
# data_period2 = pr_city_max.sel(scenario='ssp245',time=slice('2014')).sel(time=slice('1955','1984')).to_pandas()
# data_period3 = pr_city_max.sel(scenario='ssp245',time=slice('2014')).sel(time=slice('1985','2015')).to_pandas()

In [None]:
# sns.histplot(data_period1,bins=np.arange(20,90,5),color='C0',element='step',alpha=0.5,kde=True,label='1931-1960')
# sns.histplot(data_period2,bins=np.arange(20,90,5),color='C1',element='step',alpha=0.5,kde=True,label='1961-1990')
# sns.histplot(data_period3,bins=np.arange(20,90,5),color='C2',element='step',alpha=0.5,kde=True,label='1991-2020')
# plt.legend()

In [None]:
# periods_stats = pd.DataFrame(index=['Mean','Standard Deviation','Skew'])
# periods_stats['1931-1960'] = [data_period1.mean(), data_period1.std(), data_period1.skew()]
# periods_stats['1961-1990'] = [data_period2.mean(), data_period2.std(), data_period2.skew()]
# periods_stats['1991-2020'] = [data_period3.mean(), data_period3.std(), data_period3.skew()]

# periods_stats = periods_stats.T
# periods_stats

In [None]:
# fit_period1, model_period1 = ef.fit_return_levels_sdfc(data_period1.values,times=np.arange(1.1,1000),periods_per_year=1,kind='GEV',N_boot=1000,full=True,model=True)
# fit_period2, model_period2 = ef.fit_return_levels_sdfc(data_period2.values,times=np.arange(1.1,1000),periods_per_year=1,kind='GEV',N_boot=1000,full=True,model=True)
# fit_period3, model_period3 = ef.fit_return_levels_sdfc(data_period3.values,times=np.arange(1.1,1000),periods_per_year=1,kind='GEV',N_boot=1000,full=True,model=True)

In [None]:
# fig, ax = plt.subplots()
# x = np.linspace(20,90,1000)
# ax.plot(x,gev.pdf(
#     x,
#     -model_period1.shape.mean(),
#     loc=model_period1.loc.mean(),
#     scale=model_period1.scale.mean()
#     ),
#     c='C0',lw=3,label='1931-1960')
# ax.plot(x,gev.pdf(
#     x,
#     -model_period2.shape.mean(),
#     loc=model_period2.loc.mean(),
#     scale=model_period2.scale.mean()
#     ),
#     c='C1',lw=3,label='1961-1990')
# ax.plot(x,gev.pdf(
#     x,
#     -model_period3.shape.mean(),
#     loc=model_period3.loc.mean(),
#     scale=model_period3.scale.mean()
#     ),
#     c='C2',lw=3,label='1991-2020')
# ax.legend()
# ax.set_xlabel('annual maximum precipitation (mm/day)')
# ax.set_ylabel('Density')

In [None]:
# parameters = pd.DataFrame(index=['Location', 'Scale', 'Shape'])
# parameters['1931-1960'] = [model_period1.loc.mean(), model_period1.scale.mean(), model_period1.shape.mean()]
# parameters['1961-1990'] = [model_period2.loc.mean(), model_period2.scale.mean(), model_period2.shape.mean()]
# parameters['1991-2020'] = [model_period3.loc.mean(), model_period3.scale.mean(), model_period3.shape.mean()]

# parameters = parameters.T
# parameters.round(4)#.astype('%.2f')

# Scenarios

Now let's look at hot days in possible climate futures: the years 2050-2080

In [None]:
# data

In [None]:
# data_city = data.sel(city=city,model='MPI-ESM1-2-HR')

In [None]:
# tasmax_city_fut = data_city['tasmax'].sel(time=slice('2050','2079')).resample(time='1Y').max()

In [None]:
# sns.histplot(data=tasmax_city_fut.to_dataframe()['tasmax'].reset_index(),x='tasmax',hue='scenario',kde=True,bins=np.arange(294,302,0.5))#.plot.hist(hue='scenario')

In [None]:
# data_period1 = tasmax_city_fut.sel(scenario='ssp126').to_pandas()
# data_period2 = tasmax_city_fut.sel(scenario='ssp245').to_pandas()
# data_period3 = tasmax_city_fut.sel(scenario='ssp585').to_pandas()

In [None]:
# sns.histplot(data_period1,bins=np.arange(294,302,0.5),color='C0',element='step',alpha=0.5,kde=True,label='1931-1960')
# sns.histplot(data_period2,bins=np.arange(294,302,0.5),color='C1',element='step',alpha=0.5,kde=True,label='1961-1990')
# sns.histplot(data_period3,bins=np.arange(294,302,0.5),color='C2',element='step',alpha=0.5,kde=True,label='1991-2020')
# plt.legend()

In [None]:
# periods_stats = pd.DataFrame(index=['Mean','Standard Deviation','Skew'])
# periods_stats['1931-1960'] = [data_period1.mean(), data_period1.std(), data_period1.skew()]
# periods_stats['1961-1990'] = [data_period2.mean(), data_period2.std(), data_period2.skew()]
# periods_stats['1991-2020'] = [data_period3.mean(), data_period3.std(), data_period3.skew()]

# periods_stats = periods_stats.T
# periods_stats

In [None]:
# fit_period1, model_period1 = ef.fit_return_levels_sdfc(data_period1.values,times=np.arange(1.1,1000),periods_per_year=1,kind='GEV',N_boot=1000,full=True,model=True)
# fit_period2, model_period2 = ef.fit_return_levels_sdfc(data_period2.values,times=np.arange(1.1,1000),periods_per_year=1,kind='GEV',N_boot=1000,full=True,model=True)
# fit_period3, model_period3 = ef.fit_return_levels_sdfc(data_period3.values,times=np.arange(1.1,1000),periods_per_year=1,kind='GEV',N_boot=1000,full=True,model=True)

In [None]:
# fig, ax = plt.subplots()
# x = np.linspace(293,302,1000)
# ax.plot(x,gev.pdf(
#     x,
#     -model_period1.shape.mean(),
#     loc=model_period1.loc.mean(),
#     scale=model_period1.scale.mean()
#     ),
#     c='C0',lw=3,label='1931-1960')
# ax.plot(x,gev.pdf(
#     x,
#     -model_period2.shape.mean(),
#     loc=model_period2.loc.mean(),
#     scale=model_period2.scale.mean()
#     ),
#     c='C1',lw=3,label='1961-1990')
# ax.plot(x,gev.pdf(
#     x,
#     -model_period3.shape.mean(),
#     loc=model_period3.loc.mean(),
#     scale=model_period3.scale.mean()
#     ),
#     c='C2',lw=3,label='1991-2020')
# ax.legend()
# ax.set_xlabel('annual maximum daily maximum temperature (K)')
# ax.set_ylabel('Density')

In [None]:
# parameters = pd.DataFrame(index=['Location', 'Scale', 'Shape'])
# parameters['1931-1960'] = [model_period1.loc.mean(), model_period1.scale.mean(), model_period1.shape.mean()]
# parameters['1961-1990'] = [model_period2.loc.mean(), model_period2.scale.mean(), model_period2.shape.mean()]
# parameters['1991-2020'] = [model_period3.loc.mean(), model_period3.scale.mean(), model_period3.shape.mean()]

# parameters = parameters.T
# parameters.round(4)#.astype('%.2f')

In [None]:
# ef.plot_levels_from_obj(fit_period1,alpha=0.95)
# ef.plot_levels_from_obj(fit_period2,alpha=0.95,ax=plt.gca(),c='C1')
# ef.plot_levels_from_obj(fit_period3,alpha=0.95,ax=plt.gca(),c='C2')
# # plt.xlim(1.5,1000)
# # plt.ylim(0,None)
# # plt.semilogx()

# Potential add-on: spatial analysis

TBD