In [1]:
# Imports
import os
import sys
import glob
import pickle
import re
import subprocess
import intake
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from cdo import Cdo

# Initialize CDO for climate data operations
cdo = Cdo()

# Confirm setup
print("Setup complete")

Setup complete


In [2]:
# Load the catalog
col_url = "/work/ik1017/Catalogs/dkrz_cmip6_disk.json"
col = intake.open_esm_datastore(col_url)

  df = pd.read_csv(


In [13]:
# Define variables and scenarios
#variables = ["fgco2", "nbp", "fLuc", "npp", "gpp", "rh", "ra", "cVeg", "cSoil", "cLitter"]
variables = ["fgco2", "nbp", "npp", "gpp"]

#variables = ["fgco2", "intpp", "intpoc", "intdic", "intdoc", "cSoil", "cVeg", "fLuc", "gpp", "nbp", "nep", "npp", "ra", "rh"]
scenarios = ["historical", "ssp126", "ssp245", "ssp370", "ssp585"]
limit = 5 # number of variables alowd to be missing

In [14]:
# Initialize a list to store DataFrames
DF = []

# Loop through each scenario and query the catalog
for scenario in scenarios:
    activity = 'CMIP' if scenario == "historical" else 'ScenarioMIP'
    
    # Search the catalog and create an overview DataFrame
    query = dict(activity_id=activity, variable_id=variables, experiment_id=scenario)
    col_subset = col.search(**query)
    
    # Create a DataFrame overview of available data
    df_init = col_subset.df
    df_overview = df_init.groupby(['source_id', 'variable_id']).member_id.nunique().unstack()
    
    # List variables without data for each model
    df_overview[scenario] = df_overview.isna().apply(lambda row: row[row].index.tolist(), axis=1)
    
    # Append the DataFrame to the list
    DF.append(pd.DataFrame(df_overview[scenario]))

# Merge all DataFrames
merged_df = DF[0]
for df in DF[1:]:
    merged_df = merged_df.join(df, how='outer')

# Fill NaN values with the number of variables
merged_df = merged_df.fillna(len(variables))

In [15]:
def map(val):
    try:
        if len(val) == 0:
            description = "all"
        elif len(val) < limit:
            description = str(val)
        else:
            description = "missing"           
    except:
        description = "none"

    return description

In [16]:
# Apply the mapping function to each column
for column in merged_df.columns:
    merged_df[column] = merged_df[column].apply(map)

In [17]:
# Styling the DataFrame for visualization
table_styles = [
    dict(selector="th", props=[('width', '60px')]),
    dict(selector="th.col_heading", props=[('transform', 'rotateZ(-90deg)',), 
                                           ('max-height', '180px'), 
                                           ('height', '160px'), 
                                           ('max-width', '5px'), 
                                           ('vertical-align', 'left')])
]

# Function to apply color highlighting based on value
def highlight_color(val):
    if val == "all":
        return 'background-color: rgba(0,255,50)'
    elif val == "none":
        return 'background-color: rgba(0,0,0)'
    elif val == "missing":
        return 'background-color: rgba(255,0,0)'
    return 'background-color: rgba(255,165,0)'

# Apply styles and render the DataFrame
styled_df = (
    merged_df.style
    .applymap(highlight_color)
    .set_table_styles(table_styles)
    .set_properties(**{'border-color': 'grey', 'border-style': 'dotted', 'border-width': 'thin'})
)

# Display the styled DataFrame
styled_df

Unnamed: 0_level_0,historical,ssp126,ssp245,ssp370,ssp585
source_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ACCESS-ESM1-5,all,"['gpp', 'nbp', 'npp']",all,all,"['gpp', 'nbp', 'npp']"
AWI-ESM-1-1-LR,"['fgco2', 'nbp']",none,none,none,none
AWI-ESM-1-REcoM,"['fgco2', 'nbp']","['fgco2', 'nbp']","['fgco2', 'nbp']",none,"['fgco2', 'nbp']"
BCC-CSM2-MR,['nbp'],['nbp'],['nbp'],['nbp'],['nbp']
BCC-ESM1,['nbp'],none,none,none,none
CESM2,all,"['gpp', 'nbp', 'npp']","['gpp', 'npp']","['gpp', 'nbp', 'npp']","['gpp', 'nbp', 'npp']"
CESM2-FV2,all,none,none,none,none
CESM2-WACCM,all,"['gpp', 'npp']",all,all,all
CESM2-WACCM-FV2,all,none,none,none,none
CMCC-CM2-SR5,"['fgco2', 'npp']","['fgco2', 'npp']","['fgco2', 'npp']","['fgco2', 'npp']","['fgco2', 'npp']"


In [27]:
# Styling the DataFrame for visualization
table_styles = [
    dict(selector="th", props=[('width', '60px')]),
    dict(selector="th.col_heading", props=[('transform', 'rotateZ(-90deg)',), 
                                           ('max-height', '180px'), 
                                           ('height', '160px'), 
                                           ('max-width', '5px'), 
                                           ('vertical-align', 'left')])
]

# Function to apply color highlighting based on value
def highlight_color(val):
    if val == "all":
        return 'background-color: rgba(0,255,50)'
    elif val == "none":
        return 'background-color: rgba(0,0,0)'
    elif val == "missing":
        return 'background-color: rgba(255,0,0)'
    return 'background-color: rgba(255,165,0)'

# Apply styles and render the DataFrame
styled_df = (
    merged_df.style
    .applymap(highlight_color)
    .set_table_styles(table_styles)
    .set_properties(**{'border-color': 'grey', 'border-style': 'dotted', 'border-width': 'thin'})
)

# Display the styled DataFrame
styled_df

Unnamed: 0_level_0,historical,ssp126,ssp245,ssp370,ssp585
source_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ACCESS-ESM1-5,['fLuc'],missing,['fLuc'],['fLuc'],missing
AWI-ESM-1-1-LR,"['fgco2', 'nbp', 'rh']",none,none,none,none
AWI-ESM-1-REcoM,"['fgco2', 'nbp', 'rh']","['fgco2', 'nbp', 'rh']","['fgco2', 'nbp', 'rh']",none,"['fgco2', 'nbp', 'rh']"
BCC-CSM2-MR,"['fLuc', 'nbp']","['fLuc', 'nbp']","['fLuc', 'nbp']","['fLuc', 'nbp']","['fLuc', 'nbp']"
BCC-ESM1,"['fLuc', 'nbp']",none,none,none,none
CESM2,all,missing,missing,missing,missing
CESM2-FV2,all,none,none,none,none
CESM2-WACCM,all,missing,all,all,all
CESM2-WACCM-FV2,all,none,none,none,none
CMCC-CM2-SR5,missing,missing,"['cLitter', 'fLuc', 'fgco2', 'npp']",missing,missing
