<a href="https://colab.research.google.com/github/SMN-SENAMHI/ADAPTACION/blob/main/CMIP6-VERIFICACION_v1.1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install esgf-pyclient -qq > /dev/null
!pip install -q intake intake-esm xmip -qq > /dev/null

In [None]:
#cat√°logo CMIP6 (Pangeo)
import intake
cat_url = "https://storage.googleapis.com/cmip6/pangeo-cmip6.json"
cat = intake.open_esm_datastore(cat_url)
df = cat.df

#Modelos disponibles ---
modelos = sorted(df["source_id"].dropna().unique())
print(f"\n Modelos disponibles ({len(modelos)} en total):")
print("--------------------------------------------------")
for m in modelos:
    print("-", m)

#Experimentos disponibles ---
#experimentos = sorted(df["experiment_id"].dropna().unique())
#print(f"\n Experimentos disponibles ({len(experimentos)} en total):")
#print("--------------------------------------------------")
#for e in experimentos:
#    print("-", e)

#Miembros disponibles ---
#miembros = sorted(df["member_id"].dropna().unique())
#print(f"\n Miembros disponibles ({len(miembros)} en total):")
#print("--------------------------------------------------")
#for m in miembros[:5]:
#    print("-", m)
#print("...")


 Modelos disponibles (88 en total):
--------------------------------------------------
- ACCESS-CM2
- ACCESS-ESM1-5
- AWI-CM-1-1-MR
- AWI-ESM-1-1-LR
- BCC-CSM2-HR
- BCC-CSM2-MR
- BCC-ESM1
- CAMS-CSM1-0
- CAS-ESM2-0
- CESM1-1-CAM5-CMIP5
- CESM1-WACCM-SC
- CESM2
- CESM2-FV2
- CESM2-WACCM
- CESM2-WACCM-FV2
- CIESM
- CMCC-CM2-HR4
- CMCC-CM2-SR5
- CMCC-CM2-VHR4
- CMCC-ESM2
- CNRM-CM6-1
- CNRM-CM6-1-HR
- CNRM-ESM2-1
- CanESM5
- CanESM5-CanOE
- E3SM-1-0
- E3SM-1-1
- E3SM-1-1-ECA
- EC-Earth3
- EC-Earth3-AerChem
- EC-Earth3-CC
- EC-Earth3-LR
- EC-Earth3-Veg
- EC-Earth3-Veg-LR
- EC-Earth3P
- EC-Earth3P-HR
- EC-Earth3P-VHR
- ECMWF-IFS-HR
- ECMWF-IFS-LR
- FGOALS-f3-H
- FGOALS-f3-L
- FGOALS-g3
- FIO-ESM-2-0
- GFDL-AM4
- GFDL-CM4
- GFDL-CM4C192
- GFDL-ESM2M
- GFDL-ESM4
- GFDL-OM4p5B
- GISS-E2-1-G
- GISS-E2-1-G-CC
- GISS-E2-1-H
- GISS-E2-2-G
- GISS-E2-2-H
- HadGEM3-GC31-HM
- HadGEM3-GC31-LL
- HadGEM3-GC31-LM
- HadGEM3-GC31-MM
- ICON-ESM-LR
- IITM-ESM
- INM-CM4-8
- INM-CM5-0
- INM-CM5-H
- IPSL-CM5A2-

In [None]:
#!/usr/bin/env python3

import requests
import pandas as pd
from IPython.display import display, HTML


# CONSULTA
project = "CMIP6"
modelo = "NorESM2-MM"
experimento_sel = "ssp245"
miembro_sel = "r1i1p1f1"
limite = 10000

nodos = {
    "LLNL (EEUU)": "https://esgf-node.llnl.gov/esg-search/search",
    "CEDA (Reino Unido)": "https://esgf-data3.ceda.ac.uk/esg-search/search",
    "DKRZ (Alemania)": "https://esgf-data.dkrz.de/esg-search/search",
    "IPSL (Francia)": "https://esgf-node.ipsl.upmc.fr/esg-search/search",
    "NCI (Australia)": "https://esgf.nci.org.au/esg-search/search"
}

# BLOQUE 2 ‚Äì VARIABLES WRF
uso_WRF = {
    #Campos 3D atmosf√©ricos
    "ua": "Campo 3D ‚Äì Viento U",
    "va": "Campo 3D ‚Äì Viento V",
    "ta": "Campo 3D ‚Äì Temperatura",
    "hus": "Campo 3D ‚Äì Humedad espec√≠fica",

    #Superficie atmosf√©rica
    "ps": "Presi√≥n de superficie",
    "psl": "Presi√≥n al nivel del mar (diagn√≥stico)",
    "tas": "Temperatura 2 m",
    "huss": "Humedad 2 m",
    "uas": "Viento 10 m U",
    "vas": "Viento 10 m V",

    #Suelo
    "tsl": "Temperatura de suelo",
    "mrsol": "Humedad total del suelo",
    "sftlf": "M√°scara tierra-oc√©ano",

    #Oce√°nico
    "tos": "Temperatura superficial del mar (SST)"
}

#FRECUENCIA
horas_tables = {"3hr", "E3hr", "E3hrPt", "6hrLev", "6hrPlev", "6hrPlevPt"}
subdiaria_tables = {"day"}
mensual_tables = {"Amon", "Lmon", "Omon", "Emon", "fx"}

def filtrar_listas(tablas, grupo):
    tablas_set = set(tablas.split(", "))
    seleccion = sorted(tablas_set & grupo)
    return ", ".join(seleccion) if seleccion else "‚Äî"

def prioridad_wrf(row):
    """Determina la prioridad de la variable seg√∫n la frecuencia disponible."""
    if row["horas"] != "‚Äî":
        return "üü¢ Alta (horaria)"
    elif row["subdiaria"] != "‚Äî":
        return "üü° Media (diaria)"
    elif row["mensual"] != "‚Äî":
        return "üî¥ Baja (mensual)"
    else:
        return "‚Äî No disponible"

def estilo_tabla(df_estilo):
    return (df_estilo.style
            .set_table_styles([
                {"selector": "th",
                 "props": [("font-weight", "bold"),
                           ("background-color", "#B0C4DE"),
                           ("color", "black"),
                           ("text-align", "center"),
                           ("border", "1px solid gray")]},
                {"selector": "td",
                 "props": [("text-align", "center"),
                           ("border", "1px solid #D3D3D3"),
                           ("padding", "5px")]}
            ])
            .set_properties(**{"text-align": "center"})
            .set_table_attributes('style="border-collapse:collapse; margin:auto; width:95%"')
            .hide(axis="index"))

#Consulta del Nodo y/o Repositorio
for nombre, nodo in nodos.items():
    print(f"\n Explorando nodo: {nombre}")
    params = {
        "project": project,
        "type": "Dataset",
        "source_id": modelo,
        "experiment_id": experimento_sel,
        "variant_label": miembro_sel,
        "format": "application/solr+json",
        "limit": limite,
        "replica": "false"
    }

    try:
        r = requests.get(nodo, params=params, timeout=25)
        r.raise_for_status()
        data = r.json()
        docs = data["response"]["docs"]
        print(f"Registros obtenidos en {nombre}: {len(docs)}")

        if len(docs) == 0:
            print("Sin resultados en este nodo.")
            display(HTML(f"""
            <hr style='border:1px solid #999; margin:25px 0;'>
            <div style='text-align:center; color:#555; font-style:italic; margin-bottom:15px;'>
            </div>
            """))
            continue

        #DataFrame
        records = []
        for d in docs:
            var = d.get("variable_id", ["?"])[0] if "variable_id" in d else "?"
            tabla = d.get("table_id", ["?"])[0] if "table_id" in d else "?"
            grid = d.get("grid_label", ["?"])[0] if "grid_label" in d else "?"
            records.append((var, tabla, grid))
        df = pd.DataFrame(records, columns=["variable_id", "table_id", "grid_label"]).drop_duplicates()

        #variables WRF seleccionadas
        df = df[df["variable_id"].isin(uso_WRF.keys())].copy()
        if df.empty:
            print("Ninguna variable WRF encontrada en este nodo.")
            continue

        df["uso_WRF"] = df["variable_id"].map(uso_WRF)

        tablas_join = (
            df.groupby("variable_id")["table_id"]
            .agg(lambda x: ", ".join(sorted(set(x))))
            .reset_index()
            .rename(columns={"table_id": "tablas_encontradas"})
        )
        df = pd.merge(df, tablas_join, on="variable_id")

        #frecuencia
        df["horas"] = df["tablas_encontradas"].apply(lambda x: filtrar_listas(x, horas_tables))
        df["subdiaria"] = df["tablas_encontradas"].apply(lambda x: filtrar_listas(x, subdiaria_tables))
        df["mensual"] = df["tablas_encontradas"].apply(lambda x: filtrar_listas(x, mensual_tables))

        df["WRF_Listo"] = df.apply(prioridad_wrf, axis=1)

        orden_var = [
            #Campos en niveles
            "ua", "va", "ta", "hus",
            #Superficie atmosf√©rica
            "ps", "psl", "tas", "huss", "uas", "vas",
            #Superficie terrestre / suelo
            "tsl", "mrsol", "sftlf",
            #Oce√°nico
            "tos"
        ]
        orden_filtrado = [v for v in orden_var if v in df["variable_id"].values]
        df["orden"] = df["variable_id"].apply(lambda v: orden_filtrado.index(v) if v in orden_filtrado else 999)
        df = df.sort_values("orden").drop(columns="orden")

        df_final = (df[["variable_id", "uso_WRF", "tablas_encontradas",
                        "horas", "subdiaria", "mensual", "grid_label", "WRF_Listo"]]
                    .drop_duplicates().reset_index(drop=True))

        titulo_html = f"""
        <div style='text-align:left; margin-top:15px; margin-bottom:10px;'>
            <h3 style='color:#003366; font-family:Arial; font-weight:bold;'>
                Nodo: {nombre}<br>
                <span style="font-size:15px; color:#444;">
        Variables {project} ‚Äì {modelo} / {experimento_sel} / {miembro_sel}
                </span>
            </h3>
        </div>
        """

        display(HTML(titulo_html))
        display(estilo_tabla(df_final))

        #Resumen de solictudd
        resumen = df_final["WRF_Listo"].value_counts()
        print("\n Resumen de prioridad:")
        for clave in ["üü¢ Alta (horaria)", "üü° Media (diaria)", "üî¥ Baja (mensual)", "‚Äî No disponible"]:
            if clave in resumen:
                print(f"{clave}: {resumen[clave]} variables")

    except Exception as e:
        print(f"‚ùå Error en {nombre}: {e}")

    display(HTML(f"""
        <hr style='border:1px solid #999; margin:25px 0;'>
        <div style='text-align:center; color:#555; font-style:italic; margin-bottom:15px;'>
        </div>
    """))



 Explorando nodo: LLNL (EEUU)
Registros obtenidos en LLNL (EEUU): 1206


variable_id,uso_WRF,tablas_encontradas,horas,subdiaria,mensual,grid_label,WRF_Listo
ua,Campo 3D ‚Äì Viento U,"6hrPlevPt, AERmon, Amon, CFday, Eday, Emon, day",6hrPlevPt,day,"Amon, Emon",gn,üü¢ Alta (horaria)
va,Campo 3D ‚Äì Viento V,"6hrPlevPt, AERmon, Amon, CFday, Eday, Emon, day",6hrPlevPt,day,"Amon, Emon",gn,üü¢ Alta (horaria)
ta,Campo 3D ‚Äì Temperatura,"6hrPlevPt, Amon, CFday, CFmon, Eday, Emon, day",6hrPlevPt,day,"Amon, Emon",gn,üü¢ Alta (horaria)
hus,Campo 3D ‚Äì Humedad espec√≠fica,"Amon, CFday, CFmon, Eday, Emon, day",‚Äî,day,"Amon, Emon",gn,üü° Media (diaria)
ps,Presi√≥n de superficie,"AERmon, Amon, CFday, Emon",‚Äî,‚Äî,"Amon, Emon",gn,üî¥ Baja (mensual)
psl,Presi√≥n al nivel del mar (diagn√≥stico),"Amon, day",‚Äî,day,Amon,gn,üü° Media (diaria)
tas,Temperatura 2 m,"6hrPlev, 6hrPlevPt, Amon, day","6hrPlev, 6hrPlevPt",day,Amon,gn,üü¢ Alta (horaria)
huss,Humedad 2 m,"Amon, day",‚Äî,day,Amon,gn,üü° Media (diaria)
tsl,Temperatura de suelo,"Eday, Lmon",‚Äî,‚Äî,Lmon,gn,üî¥ Baja (mensual)
mrsol,Humedad total del suelo,"Eday, Emon",‚Äî,‚Äî,Emon,gn,üî¥ Baja (mensual)



 Resumen de prioridad:
üü¢ Alta (horaria): 4 variables
üü° Media (diaria): 3 variables
üî¥ Baja (mensual): 5 variables



 Explorando nodo: CEDA (Reino Unido)
‚ùå Error en CEDA (Reino Unido): HTTPSConnectionPool(host='esgf-data3.ceda.ac.uk', port=443): Max retries exceeded with url: /esg-search/search?project=CMIP6&type=Dataset&source_id=NorESM2-MM&experiment_id=ssp245&variant_label=r1i1p1f1&format=application%2Fsolr%2Bjson&limit=10000&replica=false (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7cb920567590>: Failed to resolve 'esgf-data3.ceda.ac.uk' ([Errno -2] Name or service not known)"))



 Explorando nodo: DKRZ (Alemania)
Registros obtenidos en DKRZ (Alemania): 0
Sin resultados en este nodo.



 Explorando nodo: IPSL (Francia)
Registros obtenidos en IPSL (Francia): 605


variable_id,uso_WRF,tablas_encontradas,horas,subdiaria,mensual,grid_label,WRF_Listo
ua,Campo 3D ‚Äì Viento U,"6hrLev, 6hrPlevPt, AERmon, Amon, CFday, Eday, Emon, day","6hrLev, 6hrPlevPt",day,"Amon, Emon",gn,üü¢ Alta (horaria)
va,Campo 3D ‚Äì Viento V,"6hrLev, 6hrPlevPt, AERmon, Amon, CFday, Eday, Emon, day","6hrLev, 6hrPlevPt",day,"Amon, Emon",gn,üü¢ Alta (horaria)
ta,Campo 3D ‚Äì Temperatura,"6hrLev, 6hrPlevPt, Amon, CFday, CFmon, Eday, Emon, day","6hrLev, 6hrPlevPt",day,"Amon, Emon",gn,üü¢ Alta (horaria)
hus,Campo 3D ‚Äì Humedad espec√≠fica,"6hrLev, 6hrPlev, 6hrPlevPt, Amon, CFday, CFmon, Eday, Emon, day","6hrLev, 6hrPlev, 6hrPlevPt",day,"Amon, Emon",gn,üü¢ Alta (horaria)
ps,Presi√≥n de superficie,"6hrLev, AERmon, Amon, CFday, Emon",6hrLev,‚Äî,"Amon, Emon",gn,üü¢ Alta (horaria)
psl,Presi√≥n al nivel del mar (diagn√≥stico),"6hrPlev, 6hrPlevPt, Amon, day","6hrPlev, 6hrPlevPt",day,Amon,gn,üü¢ Alta (horaria)
tas,Temperatura 2 m,"6hrPlev, 6hrPlevPt, Amon, day","6hrPlev, 6hrPlevPt",day,Amon,gn,üü¢ Alta (horaria)
huss,Humedad 2 m,"6hrPlevPt, Amon, day",6hrPlevPt,day,Amon,gn,üü¢ Alta (horaria)
tsl,Temperatura de suelo,"Eday, Lmon",‚Äî,‚Äî,Lmon,gn,üî¥ Baja (mensual)
mrsol,Humedad total del suelo,"Eday, Emon",‚Äî,‚Äî,Emon,gn,üî¥ Baja (mensual)



 Resumen de prioridad:
üü¢ Alta (horaria): 8 variables
üî¥ Baja (mensual): 4 variables



 Explorando nodo: NCI (Australia)
Registros obtenidos en NCI (Australia): 605


variable_id,uso_WRF,tablas_encontradas,horas,subdiaria,mensual,grid_label,WRF_Listo
ua,Campo 3D ‚Äì Viento U,"6hrLev, 6hrPlevPt, AERmon, Amon, CFday, Eday, Emon, day","6hrLev, 6hrPlevPt",day,"Amon, Emon",gn,üü¢ Alta (horaria)
va,Campo 3D ‚Äì Viento V,"6hrLev, 6hrPlevPt, AERmon, Amon, CFday, Eday, Emon, day","6hrLev, 6hrPlevPt",day,"Amon, Emon",gn,üü¢ Alta (horaria)
ta,Campo 3D ‚Äì Temperatura,"6hrLev, 6hrPlevPt, Amon, CFday, CFmon, Eday, Emon, day","6hrLev, 6hrPlevPt",day,"Amon, Emon",gn,üü¢ Alta (horaria)
hus,Campo 3D ‚Äì Humedad espec√≠fica,"6hrLev, 6hrPlev, 6hrPlevPt, Amon, CFday, CFmon, Eday, Emon, day","6hrLev, 6hrPlev, 6hrPlevPt",day,"Amon, Emon",gn,üü¢ Alta (horaria)
ps,Presi√≥n de superficie,"6hrLev, AERmon, Amon, CFday, Emon",6hrLev,‚Äî,"Amon, Emon",gn,üü¢ Alta (horaria)
psl,Presi√≥n al nivel del mar (diagn√≥stico),"6hrPlev, 6hrPlevPt, Amon, day","6hrPlev, 6hrPlevPt",day,Amon,gn,üü¢ Alta (horaria)
tas,Temperatura 2 m,"6hrPlev, 6hrPlevPt, Amon, day","6hrPlev, 6hrPlevPt",day,Amon,gn,üü¢ Alta (horaria)
huss,Humedad 2 m,"6hrPlevPt, Amon, day",6hrPlevPt,day,Amon,gn,üü¢ Alta (horaria)
tsl,Temperatura de suelo,"Eday, Lmon",‚Äî,‚Äî,Lmon,gn,üî¥ Baja (mensual)
mrsol,Humedad total del suelo,"Eday, Emon",‚Äî,‚Äî,Emon,gn,üî¥ Baja (mensual)



 Resumen de prioridad:
üü¢ Alta (horaria): 8 variables
üî¥ Baja (mensual): 4 variables
