In [1]:
import os
import xarray as xr
import rioxarray as rio
import numpy as np
from glob import glob
import pandas as pd
from tqdm import tqdm
from urllib.request import urlopen
from rasterio.enums import Resampling

## Code for analysis of the soil carbon sink in TRENDYv12 S2 simulations

### Download the data

#### Find the relevant files from Mike O'Sullivan's gihub

In [2]:
data_dir = '../data/'
url = 'https://raw.githubusercontent.com/mdosullivan/GCB/main/fileIndex.json'
file_index = urlopen(url)
TRENDY_FILES = pd.read_json(file_index.read().decode('utf-8'))

# Take only the TRENDY v12 files from the S2 simulation, and only the carbon pools. Exclude CARDAMOM.
TRENDY_v12 = TRENDY_FILES[TRENDY_FILES[0].str.contains('trendyv12')]
TRENDY_v12_S2 = TRENDY_v12[TRENDY_v12[0].str.contains('/S2/')]
TRENDY_v12_S2_cPools = TRENDY_v12_S2[TRENDY_v12_S2[0].str.contains('_cSoil\.|cVeg\.|cLitter\.|cCwd\.|cProduct\.')]
TRENDY_v12_S2_cPools = TRENDY_v12_S2_cPools[~TRENDY_v12_S2_cPools[0].str.contains('CARDAMOM')]

  TRENDY_FILES = pd.read_json(file_index.read().decode('utf-8'))


In [27]:
TRENDY_v12_S2_nbp = TRENDY_v12_S2[TRENDY_v12_S2[0].str.contains('nbp\.|nbpAnnual')]
TRENDY_v12_S2_nbp = TRENDY_v12_S2_nbp[~TRENDY_v12_S2_nbp[0].str.contains('CARDAMOM')]

Download the files

In [28]:
aws_url = 'https://gcbo-opendata.s3.eu-west-2.amazonaws.com/'

for i, row in tqdm(TRENDY_v12_S2_cPools.iterrows()):
    
    # get the url for download
    download_url = aws_url + row[0]

    # define the destination directory and file
    dst_dir = data_dir + '/'.join(row[0].split('/')[1:-1])
    dst_file = row[0].split('/')[-1]

    # if destination directory does not exist, create it
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    # if file does not exist, download it
    if not os.path.exists(dst_dir + '/' + dst_file):
        print(f'Downloading {dst_file} to {dst_dir}')
        os.system(f'wget {download_url} -P {dst_dir}')

17it [00:00, 161.46it/s]

Downloading CABLE-POP_S2_cCwd.nc.gz to ../data/CABLEPOP/S2
Downloading CABLE-POP_S2_cLitter.nc.gz to ../data/CABLEPOP/S2
Downloading CABLE-POP_S2_cSoil.nc.gz to ../data/CABLEPOP/S2
Downloading CABLE-POP_S2_cVeg.nc.gz to ../data/CABLEPOP/S2
Downloading CLASSIC_S2_cLitter.nc to ../data/CLASSIC/S2
Downloading CLASSIC_S2_cSoil.nc to ../data/CLASSIC/S2
Downloading CLASSIC_S2_cVeg.nc to ../data/CLASSIC/S2
Downloading CLM5.0_S2_cCwd.nc to ../data/CLM5.0/S2
Downloading CLM5.0_S2_cLitter.nc to ../data/CLM5.0/S2
Downloading CLM5.0_S2_cProduct.nc to ../data/CLM5.0/S2
Downloading CLM5.0_S2_cSoil.nc to ../data/CLM5.0/S2
Downloading CLM5.0_S2_cVeg.nc to ../data/CLM5.0/S2
Downloading DLEM_S2_cCwd.nc to ../data/DLEM/S2
Downloading DLEM_S2_cLitter.nc to ../data/DLEM/S2
Downloading DLEM_S2_cProduct.nc to ../data/DLEM/S2
Downloading DLEM_S2_cSoil.nc to ../data/DLEM/S2
Downloading DLEM_S2_cVeg.nc to ../data/DLEM/S2
Downloading EDv3_S2_cProduct.nc to ../data/ED/S2
Downloading EDv3_S2_cSoil.nc to ../data/ED

52it [00:00, 151.52it/s]

Downloading ISBA-CTRIP_S2_cVeg.nc to ../data/ISBACTRIP/S2
Downloading JSBACH_S2_cLitter.nc to ../data/JSBACH/S2
Downloading JSBACH_S2_cProduct.nc to ../data/JSBACH/S2
Downloading JSBACH_S2_cSoil.nc to ../data/JSBACH/S2
Downloading JSBACH_S2_cVeg.nc to ../data/JSBACH/S2
Downloading JULES_S2_cSoil.nc to ../data/JULES/S2
Downloading JULES_S2_cVeg.nc to ../data/JULES/S2
Downloading LPJ-GUESS_S2_cCwd.nc to ../data/LPJ-GUESS/S2
Downloading LPJ-GUESS_S2_cLitter.nc to ../data/LPJ-GUESS/S2
Downloading LPJ-GUESS_S2_cProduct.nc to ../data/LPJ-GUESS/S2
Downloading LPJ-GUESS_S2_cSoil.nc to ../data/LPJ-GUESS/S2
Downloading LPJ-GUESS_S2_cVeg.nc to ../data/LPJ-GUESS/S2
Downloading LPJmL_S2_cCwd.nc to ../data/LPJml/S2
Downloading LPJmL_S2_cLitter.nc to ../data/LPJml/S2
Downloading LPJmL_S2_cProduct.nc to ../data/LPJml/S2
Downloading LPJmL_S2_cSoil.nc to ../data/LPJml/S2
Downloading LPJmL_S2_cVeg.nc to ../data/LPJml/S2
Downloading LPJwsl_S2_cLitter.nc.gz to ../data/LPJwsl/S2
Downloading LPJwsl_S2_cSoil.

75it [00:00, 153.92it/s]

Downloading SDGVM_S2_cVeg.nc to ../data/SDGVM/S2
Downloading VISIT_S2_cLitter.nc.gz to ../data/VISIT/S2
Downloading VISIT_S2_cProduct.nc.gz to ../data/VISIT/S2
Downloading VISIT_S2_cSoil.nc.gz to ../data/VISIT/S2
Downloading VISIT_S2_cVeg.nc.gz to ../data/VISIT/S2
Downloading YIBs_S2_Annual_cSoil.nc.tar.gz to ../data/YIBS/S2
Downloading YIBs_S2_Annual_cVeg.nc.tar.gz to ../data/YIBS/S2
Downloading LPX-Bern_S2_cLitter.nc to ../data/lpxqs/S2
Downloading LPX-Bern_S2_cProduct.nc to ../data/lpxqs/S2
Downloading LPX-Bern_S2_cSoil.nc to ../data/lpxqs/S2
Downloading LPX-Bern_S2_cVeg.nc to ../data/lpxqs/S2





In [11]:
# uncomress all compressed files in the directories under ../data/
! find $data_dir -name "*.gz" -exec gunzip {} \;

# uncomress all compressed tar files in the directories under ../data/ into the same directory
! find $data_dir -name "*.tar" -exec tar -xvf {} -C ../data/ \;

! mv $data_dir/YIBs_S2_Annual_cSoil.nc $data_dir/YIBS/S2/
! mv $data_dir/YIBs_S2_Annual_cVeg.nc $data_dir/YIBS/S2/

find: illegal option -- n
usage: find [-H | -L | -P] [-EXdsx] [-f path] path ... [expression]
       find [-H | -L | -P] [-EXdsx] -f path [path ...] [expression]
find: illegal option -- n
usage: find [-H | -L | -P] [-EXdsx] [-f path] path ... [expression]
       find [-H | -L | -P] [-EXdsx] -f path [path ...] [expression]
mv: rename ../data//YIBs_S2_Annual_cSoil.nc to ../data//YIBS/S2/YIBs_S2_Annual_cSoil.nc: No such file or directory
mv: rename ../data//YIBs_S2_Annual_cVeg.nc to ../data//YIBS/S2/YIBs_S2_Annual_cVeg.nc: No such file or directory


In [12]:
aws_url = 'https://gcbo-opendata.s3.eu-west-2.amazonaws.com/'
for i, row in tqdm(TRENDY_v12_S2_nbp.iterrows()):
    
    # get the url for download
    download_url = aws_url + row[0]

    # define the destination directory and file
    dst_dir = data_dir + '/'.join(row[0].split('/')[1:-1])
    dst_file = row[0].split('/')[-1]

    # if destination directory does not exist, create it
    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    # if file does not exist, download it
    if not os.path.exists(dst_dir + '/' + dst_file):
        print(f'Downloading {dst_file} to {dst_dir}')
        os.system(f'wget {download_url} -P {dst_dir}')

21it [00:00, 206.22it/s]

Downloading CABLE-POP_S2_nbp.nc.gz to ../data/CABLEPOP/S2
Downloading CLASSIC_S2_nbp.nc to ../data/CLASSIC/S2
Downloading CLM5.0_S2_nbp.nc to ../data/CLM5.0/S2
Downloading DLEM_S2_nbp.nc to ../data/DLEM/S2
Downloading EDv3_S2_nbp.nc to ../data/ED/S2
Downloading E3SM_S2_nbp.nc to ../data/ELM/S2
Downloading IBIS_S2_nbp.nc to ../data/IBIS/S2
Downloading ISAM_S2_nbp.nc to ../data/ISAM/S2
Downloading ISBA-CTRIP_S2_nbp.nc to ../data/ISBACTRIP/S2
Downloading JSBACH_S2_nbp.nc to ../data/JSBACH/S2
Downloading JULES_S2_nbp.nc to ../data/JULES/S2
Downloading LPJ-GUESS_S2_nbp.nc to ../data/LPJ-GUESS/S2
Downloading LPJmL_S2_nbp.nc to ../data/LPJml/S2
Downloading LPJwsl_S2_nbp.nc.gz to ../data/LPJwsl/S2
Downloading OCN_S2_nbp.nc to ../data/OCN/S2
Downloading ORCHIDEE_S2_nbp.nc to ../data/ORCHIDEE/S2
Downloading SDGVM_S2_nbp.csv to ../data/SDGVM/S2
Downloading SDGVM_S2_nbpAnnual.nc to ../data/SDGVM/S2
Downloading VISIT_S2_nbp.nc.gz to ../data/VISIT/S2
Downloading YIBs_S2_Monthly_nbp.nc.tar.gz to ../d




#### Unzip the files

In [13]:
# uncomress all compressed files in the directories under ../data/
! find ../data/ -name "*.gz" -exec gunzip {} \;

# uncomress all compressed tar files in the directories under ../data/ into the same directory
! find ../data/ -name "*.tar" -exec tar -xvf {} -C ../data/ \;

! mv ../data/YIBs_S2_Annual_cSoil.nc $data_dir/YIBS/S2/
! mv ../data/YIBs_S2_Annual_cVeg.nc $data_dir/YIBS/S2/
! mv  ../data/YIBs_S2_Monthly_nbp.nc $data_dir/YIBS/S2/

mv: rename ../data/YIBs_S2_Annual_cSoil.nc to ../data//YIBS/S2/YIBs_S2_Annual_cSoil.nc: No such file or directory
mv: rename ../data/YIBs_S2_Annual_cVeg.nc to ../data//YIBS/S2/YIBs_S2_Annual_cVeg.nc: No such file or directory
mv: rename ../data/YIBs_S2_Monthly_nbp.nc to ../data//YIBS/S2/YIBs_S2_Monthly_nbp.nc: No such file or directory


#### Download cell area files

The `areacella_fx_E3SM-2-0_piControl_r1i1p1f1_gr.nc`, `sftlf_fx_E3SM-2-0_piControl_r1i1p1f1_gr.nc` and `JULES-ES.1p0.vn5.4.50.CRUJRA2.TRENDYv8.365.landAreaFrac.nc` files are from the CMIP outputs or from TRENDYv8

In [14]:
cell_area_files = TRENDY_v12_S2[TRENDY_v12_S2[0].str.contains('_area|_oceanCoverFrac\.|_land_fraction\.|_landCoverFrac\.|sftlf\.|landAreaFrac')]

DLEM_area = 'trendyv11-gcb2022/DLEM/DLEM_land_area.nc'
ISBACTRIP_area = 'trendyv12-gcb2023/ISBACTRIP/ISBA-CTRIP_area.nc'

models = ['CLM5.0','IBIS','OCN','ORCHIDEE','LPJmL','CLASSIC','EDv3','ISBA-CTRIP']

cell_area_files = cell_area_files[cell_area_files[0].str.contains('|'.join(models))]

for file in list(cell_area_files[0].values) + [DLEM_area, ISBACTRIP_area]:
    download_url = aws_url + file
    dst_dir = data_dir + '/cell_area/'
    dst_file = file.split('/')[-1]
    if not os.path.exists(dst_dir + '/' + dst_file):
        print(f'Downloading {dst_file} to {dst_dir}')
        os.system(f'wget {download_url} -P {dst_dir}')

Downloading CLASSIC_S2_landCoverFrac.nc to ../data//cell_area/
Downloading CLASSIC_S2_land_fraction.nc to ../data//cell_area/
Downloading CLM5.0_S2_area.nc to ../data//cell_area/
Downloading EDv3_landCoverFrac.nc to ../data//cell_area/
Downloading IBIS_S2_landCoverFrac.nc to ../data//cell_area/
Downloading IBIS_S2_oceanCoverFrac.nc to ../data//cell_area/
Downloading ISBA-CTRIP_S2_sftlf.nc to ../data//cell_area/
Downloading LPJmL_S2_landCoverFrac.nc to ../data//cell_area/
Downloading LPJmL_S2_oceanCoverFrac.nc to ../data//cell_area/
Downloading OCN_S2_area.nc to ../data//cell_area/
Downloading OCN_S2_landCoverFrac.nc to ../data//cell_area/
Downloading OCN_S2_oceanCoverFrac.nc to ../data//cell_area/
Downloading ORCHIDEE_S2_landCoverFrac.nc to ../data//cell_area/
Downloading ORCHIDEE_S2_oceanCoverFrac.nc to ../data//cell_area/
Downloading DLEM_land_area.nc to ../data//cell_area/
Downloading ISBA-CTRIP_area.nc to ../data//cell_area/


### Define functions

In [15]:
# define function to calculate surface area of each pixel
def calc_pixel_area(raster:xr.DataArray) -> xr.DataArray:
    '''
    Calculate the area of each pixel in a raster

    Parameters:
    raster (xarray.DataArray): raster to calculate pixel area for

    Returns:
    xarray.DataArray: raster with pixel area as values
    '''

    # get the resolution of the raster
    res = raster.rio.resolution()

    l1 = np.radians(raster['y']- np.abs(res[1])/2)
    l2 = np.radians(raster['y']+ np.abs(res[1])/2)
    dx = np.radians(np.abs(res[0]))    
    _R = 6371e3  # Radius of earth in m. Use 3956e3 for miles

    # calculate the area of each pixel
    area = _R**2 * dx * (np.sin(l2) - np.sin(l1))

    # create a new xarray with the pixel area as values
    result = ((raster-raster+1)*area)

    # set the nodata value    
    if raster.rio.nodata is None:
        result.rio.set_nodata(np.nan,inplace=True)
    else:
        result.rio.set_nodata(raster.rio.nodata,inplace=True)
    
    return result


In [16]:
def get_area(model:str) -> xr.DataArray:
    """
    Get the area of each pixel for a given model

    Parameters:
    model: str
        the name of the model

    Returns:
    xr.DataArray
        the area of each pixel
    """

    if model == 'CLM5.0':
        
        # if the model is DLEM use the land area file and convert km2 to m2
        area_ds = xr.open_dataset(f'{data_dir}/cell_area/CLM5.0_S2_area.nc')
        area = area_ds['area']*area_ds['landfrac']
        
        # rename coordinates to x,y
        area = area.rename({'lat': 'y', 'lon': 'x'})
    elif model == 'DLEM':
        
        # if the model is DLEM use the land area file and convert km2 to m2
        area = xr.open_dataset(f'{data_dir}/cell_area/DLEM_land_area.nc')['LAND_AREA']*1e6
        area = area.rename({'lat': 'y', 'lon': 'x'})
    elif model in ['IBIS','OCN','ORCHIDEE','LPJml']:
        if model == "LPJml":
            model = "LPJmL"
        # load the ocean cover fraction data
        ocean = xr.open_dataset(f'{data_dir}/cell_area/{model}_S2_oceanCoverFrac.nc',decode_times=False)['oceanCoverFrac']
        
        # rename coordinates to x,y
        ocean = ocean.rename({'latitude': 'y', 'longitude': 'x'})
        
        # the land data is the cell area times the fraction of the cell that is not ocean
        area = calc_pixel_area(ocean)*(1-ocean)
    elif model == 'CLASSIC':
        # load land fraction data
        land_fraction = xr.open_dataset(f'{data_dir}/cell_area/CLASSIC_S2_land_fraction.nc')['sftlf'].rename({'latitude': 'y', 'longitude': 'x'})
        # the land area is the cell area times the land fraction
        area = calc_pixel_area(land_fraction)*land_fraction
    elif model == 'ED':
        # load the land area fraction data and rename coordinates
        area = xr.open_dataset(f'{data_dir}/cell_area/EDv3_landCoverFrac.nc')['landArea'].rename({'latitude': 'y', 'longitude': 'x'})
        # order the coordinates
        area = area.transpose('y','x')
    elif model == 'ELM':

        # load the a file with the base resolution to reproject the area onto
        ds = xr.open_dataset(glob(f'../data/{"ELM"}/S2/*{"nbp"}*.nc')[0],decode_times=False)

        # replace the x and y coordinates with the new ones
        ds.coords['x'] = ds['longitude']
        ds.coords['y'] = ds['latitude']

        # drop the nbnd coordinate
        ds = ds.drop_dims('nbnd')
        ds = ds.swap_dims({'lon':'x','lat':'y'})
        # drop the old longitude and latitude coordinates
        ds = ds.drop_vars(['longitude','latitude'])

        ds.rio.write_crs('EPSG:4326',inplace=True)
        ds = ds['nbp'][0,:,:]

        # load the land area fraction data and rename coordinates
        cell_area = xr.open_dataset(f'{data_dir}/cell_area/areacella_fx_E3SM-2-0_piControl_r1i1p1f1_gr.nc')['areacella'].rename({'lat': 'y', 'lon': 'x'})
        land_fraction = xr.open_dataset(f'{data_dir}/cell_area/sftlf_fx_E3SM-2-0_piControl_r1i1p1f1_gr.nc')['sftlf'].rename({'lat': 'y', 'lon': 'x'})/100

        # the land area is the cell area times the land fraction
        area = cell_area*land_fraction

        # change the coordinates to start from -180 to 180
        area.coords['x'] = xr.where(area.coords['x']>=180, area.coords['x']-360, area.coords['x'])
        area = area.sortby(['x','y'])
        
        # order the coordinates
        area = area.transpose('y','x')

        # reproject the area into the base resolution
        area = area.rio.write_crs('EPSG:4326',inplace=True).rio.reproject_match(ds,resampling=Resampling.sum)
        area = area.where(area<1e30)

    elif model == 'ISBACTRIP':
        # load the grid cell area file and rename coordinates
        cell_area = xr.open_dataset(f'{data_dir}/cell_area/ISBA-CTRIP_area.nc')['AREA'].rename({'LAT_FULL':'y','LON_FULL':'x'})
        
        # load the land area fraction data and rename coordinates
        land_fraction = xr.open_dataset(f'{data_dir}/cell_area/ISBA-CTRIP_S2_sftlf.nc',decode_times=False)['sftlf'].mean(dim='time_counter').rename({'lat_FULL':'y','lon_FULL':'x'})

        # the land area is the cell area times the land fraction
        area = cell_area*land_fraction
    elif model == 'JULES':
        
        # load the lancdAreaFrac from trendy-v10
        land_fraction = xr.open_dataset(f'{data_dir}/cell_area/JULES-ES.1p0.vn5.4.50.CRUJRA2.TRENDYv8.365.landAreaFrac.nc')['landFrac']
        
        # renanme the coordinates
        land_fraction = land_fraction.rename({'latitude': 'y', 'longitude': 'x'})

        # the land area is the grid cell area times the land fraction
        area = calc_pixel_area(land_fraction)*land_fraction

    return area



In [17]:
def parse_model(model:str,var:str) -> xr.Dataset:
    """
    Parse the data for a given model and variable

    Parameters:
    model: str
        the name of the model
    var: str
        the name of the variable

    Returns:
    xr.Dataset
        the parsed data
    """

    # open the dataset
    ds = xr.open_dataset(glob(f'../data/{model}/S2/*{var}*.nc')[0],decode_times=False)

    # convert coordinates to standard time,y,x
    if 'time' not in ds.sizes.keys():
       ds = ds.rename({'time_counter':'time'}) 
    if 'lon' in ds.dims:
        ds = ds.rename({'lon':'x','lat':'y'})
    elif 'longitude' in ds.dims:
        ds = ds.rename({'longitude':'x','latitude':'y'})
    else:
        ds = ds.rename({'lon_FULL':'x','lat_FULL':'y'})
    
    # set the time coordinate to datetime based on the size of the file
    if ds.sizes['time'] == 1956:
        ds['time'] = pd.date_range(start='01-01-1860', periods=len(ds.time), freq='MS')
    elif ds.sizes['time'] > 1956:
        ds['time'] = pd.date_range(start='01-01-1700', periods=len(ds.time), freq='MS')
    elif ds.sizes['time'] >300:
        ds['time'] = pd.date_range(start='01-01-1700', periods=len(ds.time), freq='YS')
    else:
        ds['time'] = pd.date_range(start='01-01-2002', periods=len(ds.time), freq='MS')
    
    if 'nbnd' in ds.dims:
        # replace the x and y coordinates with the new ones
        ds.coords['x'] = ds['longitude']
        ds.coords['y'] = ds['latitude']
        
        # drop the nbnd coordinate
        ds = ds.drop_dims('nbnd')

        # drop the old longitude and latitude coordinates
        ds = ds.drop_vars(['longitude','latitude'])

    if 'bnds' in ds.dims:
        # if the dataset had a bnds coordinate drop it
        ds = ds.drop_dims('bnds')

    # order the coordinates
    ds = ds.transpose('time','y','x')
    
    # sort the data based on y and x
    ds = ds.sortby(['y','x'])

    # if the data is in the 0-360 range, convert it to -180-180
    if ds['x'].min()>=0:
        ds.coords['x'] = xr.where(ds.coords['x']>=180, ds.coords['x']-360, ds.coords['x'])
    
    # sort the data based on y and x
    ds = ds.sortby(['y','x'])
    
    # if the variable is not in the data_vars, rename it
    ds_var = list(ds.data_vars.keys())[0]
    if var not in ds.data_vars:
        ds = ds.rename({ds_var:var})

    # # get the land area of each pixel

    # # define the models that need special attention
    models_to_fix = ['CLM5.0','DLEM','IBIS','OCN','ORCHIDEE','ISBACTRIP','JULES','CLASSIC','ED','ELM','LPJml']

    # if the model needs special attention, use the get_area function to calculate the land area
    if model in models_to_fix:
        area = get_area(model)
        if 'time' in area.dims:
            area = area.sel(time=area['time'][0]).drop_vars('time')
        if area['x'].min()>=0:
            area.coords['x'] = xr.where(area.coords['x']>=180, area.coords['x']-360, area.coords['x'])
    else:

        # otherwise use the calc_pixel_area function to calculate the land area
        area = calc_pixel_area(ds[var][1,:,:])

    # name the land_area DataArray
    area.name = 'land_area'

    # take the annual average of the data
    ds = ds.resample(time='YS').mean()
    
    if model in ['CLASSIC','CLM5.0']:
        # if the model is CLASSIC or CLM5.0, shift years by one
        ds['time'] = (ds['time'].to_series() + pd.DateOffset(years=1)).values

    # return a merged dataset of the data and the land area
    # result = xr.merge([ds,area])

    # return result
    return ds,area

### Validate nbp against GCB

In [18]:
# find all the directories with S2 subdirectories
dirs = ! find ../data/ -name "S2";

# extract the model names
models = [x.split('/')[-2] for x in dirs]

# initialize an empty list to store the parsed datasets
parsed_ds = []
parsed_global_nbp = []

# loop through the directories
for dir in tqdm(dirs):

    # extract the model name
    model = dir.split('/')[-2]
    
    # initialize an empty list to store the parsed datasets
    model_dss = []

    # find all of the netcdf files in the S2 directory
    file_names = glob(dir + "/*.nc")
    
    # loop through the files
    for file in file_names:

        # extract the variable name
        var = file.split('_')[-1].split('.')[0]

        # if the variable is a nbp variable, parse the model
        if var in ['nbp','nbpAnnual']:

            # get the parsed model and area
            dss,ar = parse_model(model,var)

            # add the product of the nbp and surface area to get units of KgC s-1 per gridcell
            model_dss.append(dss[var]*ar)
    
    # merge the datasets along the pool dimension
    model_merged_ds = xr.concat(model_dss,dim='pool')

    # extract the model name from the file name and not the directory
    model2 = file_names[0].split('/')[-1].split('_')[0]
    model_merged_ds.name = model2

    # calculate the global nbp - convert from kgC s-1 to PgC yr-1
    global_nbp = model_merged_ds.sum(dim=['x','y','pool'])*1e3/1e15 * 365*24*3600
    global_nbp.name = model2
    
    # append the global nbp to the list
    parsed_global_nbp.append(global_nbp)

# merge the global nbp datasets along the model dimension
models_global_nbp = xr.concat(parsed_global_nbp,dim='model')

# set the values of the model dimension to the model names
models_global_nbp['model'] = [x.name for x in parsed_global_nbp]

# convert the xarray to a dataframe
models_global_nbp.name = 'nbp'
models_global_nbp_df = models_global_nbp.to_dataframe()['nbp'].unstack()
models_global_nbp_df.columns = models_global_nbp_df.columns.year

  0%|          | 0/20 [00:00<?, ?it/s]


ValueError: must supply at least one object to concatenate

In [1]:
# load the GCB2023 data
GCB = pd.read_excel('https://globalcarbonbudgetdata.org/downloads/archive/Global_Carbon_Budget_2023v1.1.xlsx',sheet_name='Terrestrial Sink',skiprows=27)
GCB.set_index('Year',inplace=True)
GCB = GCB.iloc[:,2:-3]

# change the model names to match our analysis
models_GCB = list(GCB.columns)
models_GCB[4] = 'EDv3'
models_GCB[5] = 'E3SM'
models_GCB[10] = 'JULES'
models_GCB[11] = 'LPJ-GUESS'
models_GCB[13] = 'LPJmL'
models_GCB[15] = 'OCN'
models_GCB[16] = 'ORCHIDEE'
GCB.columns = models_GCB

# assert that the RMSE for all models is less than 7%
assert all((((models_global_nbp_df.T.loc[1959:2022] - GCB.loc[1959:2022])**2).mean()**0.5/GCB.loc[1959:2022].mean()*100).dropna().round(2).values < 7)

NameError: name 'pd' is not defined

### Do analysis for soil

In [20]:
# find all the directories with S2 subdirectories
dirs = ! find ../data/ -name "S2";

# extract the model names
models = [x.split('/')[-2] for x in dirs]

# initialize an empty list to store the parsed datasets
parsed_ds = []
parsed_global_cSoil = []

# loop through the directories
for dir in tqdm(dirs):

    # extract the model name
    model = dir.split('/')[-2]

    # initialize an empty list to store the parsed datasets
    model_dss = []

    # find all of the netcdf files in the S2 directory
    file_names = glob(dir + "/*.nc")

    # loop through the files
    for file in file_names:

        # extract the variable name
        var = file.split('_')[-1].split('.')[0]
        
        if var in ['cCwd','cLitter','cSoil']:
            
            # based on O'Sullivan 2022 et al. for CLM5 cLitter is included in cSoil and for CABLEPOP cCwd is included in cLitter
            if (model == 'CLM5.0' and var == 'cLitter') or (model == 'CABLEPOP' and var == 'cCwd'):
                continue

            # get the parsed model and area
            dss,ar = parse_model(model,var)

            # add the product of the nbp and surface area to get units of KgC per gridcell
            model_dss.append(dss[var]*ar)

    # merge the datasets along the pool dimension
    model_merged_ds = xr.concat(model_dss,dim='pool')

    # extract the model name from the file name and not the directory
    model2 = file_names[0].split('_')[0]
    model_merged_ds.name = model2
    
    # calculate the global stocks - convert from kgC to PgC
    global_cSoil = model_merged_ds.sum(dim=['x','y','pool'])*1e3/1e15
    global_cSoil.name = model2
    
    # append the global stocks to the list
    parsed_global_cSoil.append(global_cSoil)

# merge the global stocks datasets along the model dimension
models_global_cSoil = xr.concat(parsed_global_cSoil,dim='model')

# set the values of the model dimension to the model names
models_global_cSoil['model'] = [x.name for x in parsed_global_cSoil]
models_global_cSoil.name = 'cSoil'

# convert the xarray to a dataframe
models_global_cSoil_df = models_global_cSoil.to_dataframe()['cSoil'].unstack()
models_global_cSoil_df.columns = models_global_cSoil_df.columns.year

# save the dataframes to csv
models_global_cSoil_df.to_csv('../results/TRENDY_v12_global_cSoil_S2.csv')

  0%|          | 0/20 [00:00<?, ?it/s]


ValueError: must supply at least one object to concatenate

In [320]:
SOC_stock_change = models_global_cSoil_df.diff(axis=1).loc[:,1992:2022].mean(axis=1).mean()

print(f'The average SOC stock change from 1992 to 2022 for the TRENDY v12 models is {SOC_stock_change:.2f} PgC yr-1')

The average SOC stock change from 1992 to 2022 for the TRENDY v12 models is 1.07 PgC yr-1
