## Useful Docs

https://docs.google.com/document/d/1yUx6jr9EdedCOLd--CPdTfGDwEwzPpCF6p1jRmqx-0Q/edit#

https://agupubs.onlinelibrary.wiley.com/doi/epdf/10.3894/JAMES.2009.1.4

https://www.carbonbrief.org/cmip6-the-next-generation-of-climate-models-explained

In [1]:
import xarray as xr
import pooch
import pandas as pd
import fsspec
from pathlib import Path
import time
import numpy as np
import json
import cftime
import matplotlib.pyplot as plt
import netCDF4 as nc
from cftime import date2num


# Handy metpy tutorial working with xarray:
# https://unidata.github.io/MetPy/latest/tutorials/xarray_tutorial.html#sphx-glr-tutorials-xarray-tutorial-py
import metpy.calc as mpcalc
from metpy.cbook import get_test_data
from metpy.units import units
from metpy.plots import SkewT

In [75]:
# get all the data from google's datastore
odie = pooch.create(
    path="./.cache",
    base_url="https://storage.googleapis.com/cmip6/",
    registry={
        "pangeo-cmip6.csv": None
    },
)
file_path = odie.fetch("pangeo-cmip6.csv")
df_in = pd.read_csv(file_path)

In [97]:
# only want table id "piControl" or "historical"
df_expt = df_in[(df_in.experiment_id == "piControl") | (df_in.experiment_id == "historical")]

In [103]:
# we only want 3hr data or less
df_3hr = df_expt[(df_expt.table_id == "3hr") | (df_expt.table_id == "CF3hr")]
df_3hr

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
1687,CMIP,NOAA-GFDL,GFDL-ESM4,piControl,r1i1p1f1,3hr,tas,gr1,gs://cmip6/CMIP6/CMIP/NOAA-GFDL/GFDL-ESM4/piCo...,,20180701
1688,CMIP,NOAA-GFDL,GFDL-ESM4,piControl,r1i1p1f1,3hr,mrsos,gr1,gs://cmip6/CMIP6/CMIP/NOAA-GFDL/GFDL-ESM4/piCo...,,20180701
1689,CMIP,NOAA-GFDL,GFDL-ESM4,piControl,r1i1p1f1,3hr,mrro,gr1,gs://cmip6/CMIP6/CMIP/NOAA-GFDL/GFDL-ESM4/piCo...,,20180701
1720,CMIP,NOAA-GFDL,GFDL-ESM4,piControl,r1i1p1f1,3hr,tslsi,gr1,gs://cmip6/CMIP6/CMIP/NOAA-GFDL/GFDL-ESM4/piCo...,,20180701
1748,CMIP,NOAA-GFDL,GFDL-ESM4,piControl,r1i1p1f1,3hr,huss,gr1,gs://cmip6/CMIP6/CMIP/NOAA-GFDL/GFDL-ESM4/piCo...,,20180701
...,...,...,...,...,...,...,...,...,...,...,...
492709,CMIP,EC-Earth-Consortium,EC-Earth3-AerChem,historical,r4i1p1f1,3hr,tas,gr,gs://cmip6/CMIP6/CMIP/EC-Earth-Consortium/EC-E...,,20201214
503295,CMIP,CMCC,CMCC-ESM2,historical,r1i1p1f1,3hr,vas,gn,gs://cmip6/CMIP6/CMIP/CMCC/CMCC-ESM2/historica...,,20210114
503296,CMIP,CMCC,CMCC-ESM2,historical,r1i1p1f1,3hr,uas,gn,gs://cmip6/CMIP6/CMIP/CMCC/CMCC-ESM2/historica...,,20210114
503297,CMIP,CMCC,CMCC-ESM2,historical,r1i1p1f1,3hr,tas,gn,gs://cmip6/CMIP6/CMIP/CMCC/CMCC-ESM2/historica...,,20210114


In [144]:
models = df_3hr.groupby("source_id")

In [163]:
# variables required to create figure 10
fig10_vars = ['tas', 'mrsos', 'huss']

# which models are able to produce figure 10?
fig10_models = []

In [164]:
for model in models.groups.keys():
    the_model = models.get_group(model) #df_3hr[df_3hr.source_id == model]
    model_vars = list(the_model.variable_id)
    if all(i in model_vars for i in fig10_vars) == True:
        fig10_models.append(model)

In [165]:
#for model in models.groups.keys():
    #print(model)
fig10_models

['GFDL-CM4', 'GFDL-ESM4']

In [57]:
grouped = correct_table.groupby("source_id")

In [136]:
my_model = grouped.get_group("GFDL-ESM4")
my_model

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
240972,CMIP,NOAA-GFDL,GFDL-ESM4,historical,r1i1p1f1,3hr,vas,gr1,gs://cmip6/CMIP6/CMIP/NOAA-GFDL/GFDL-ESM4/hist...,,20190726
240973,CMIP,NOAA-GFDL,GFDL-ESM4,historical,r1i1p1f1,3hr,uas,gr1,gs://cmip6/CMIP6/CMIP/NOAA-GFDL/GFDL-ESM4/hist...,,20190726
240974,CMIP,NOAA-GFDL,GFDL-ESM4,historical,r1i1p1f1,3hr,tas,gr1,gs://cmip6/CMIP6/CMIP/NOAA-GFDL/GFDL-ESM4/hist...,,20190726
240975,CMIP,NOAA-GFDL,GFDL-ESM4,historical,r1i1p1f1,3hr,huss,gr1,gs://cmip6/CMIP6/CMIP/NOAA-GFDL/GFDL-ESM4/hist...,,20190726
241030,CMIP,NOAA-GFDL,GFDL-ESM4,historical,r1i1p1f1,3hr,pr,gr1,gs://cmip6/CMIP6/CMIP/NOAA-GFDL/GFDL-ESM4/hist...,,20190726


In [143]:
a = list(my_model.variable_id)
print(all(i in a for i in ["vas", "huss"]))

True
