In [14]:
pip install -U kaleido

[33mDEPRECATION: pytorch-lightning 1.6.1 has a non-standard dependency specifier torch>=1.8.*. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.


In [9]:
pip install selenium pillow geckodriver-autoinstaller

Collecting selenium
  Obtaining dependency information for selenium from https://files.pythonhosted.org/packages/7e/c0/092fde36918574e144613de73ba43c36ab8d31e7d36bb44c35261909452d/selenium-4.33.0-py3-none-any.whl.metadata
  Downloading selenium-4.33.0-py3-none-any.whl.metadata (7.5 kB)
Collecting geckodriver-autoinstaller
  Obtaining dependency information for geckodriver-autoinstaller from https://files.pythonhosted.org/packages/99/a0/527dd9b38cbb198e12ac1878a51cd86136222ffc1d131039e6286a6b57dc/geckodriver_autoinstaller-0.1.0-py3-none-any.whl.metadata
  Downloading geckodriver_autoinstaller-0.1.0-py3-none-any.whl.metadata (1.8 kB)
Collecting urllib3[socks]~=2.4.0 (from selenium)
  Obtaining dependency information for urllib3[socks]~=2.4.0 from https://files.pythonhosted.org/packages/6b/11/cc635220681e93a0183390e26485430ca2c7b5f9d33b15c74c2861cb8091/urllib3-2.4.0-py3-none-any.whl.metadata
  Downloading urllib3-2.4.0-py3-none-any.whl.metadata (6.5 kB)
Collecting trio~=0.30.0 (from selen

## Depuración de timestamps por año y tesela

In [7]:
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import os
import ipywidgets as widgets
from IPython.display import display, clear_output
import plotly.graph_objects as go
import pandas as pd

# ── CONFIG GLOBAL ────────────────────────────────────────────────
YEARS_TO_PROCESS = [2020, 2021, 2022, 2023, 2024]
BASE_NAS_PATH    = "/run/user/1000/gvfs/smb-share:server=10.168.168.61,share=rodrigopg/parches_S4A_CyL"

PATCH_DEFINITIONS = [
    ("29TPE", "29TPE_patch_29_09"), ("29TPF", "29TPF_patch_20_15"),
    ("29TPG", "29TPG_patch_17_10"), ("29TPH", "29TPH_patch_13_29"),
    ("29TQE", "29TQE_patch_00_00"), ("29TQF", "29TQF_patch_00_16"),
    ("29TQG", "29TQG_patch_02_22"), ("29TQH", "29TQH_patch_00_16"),
    ("30TUK", "30TUK_patch_00_00"), ("30TUL", "30TUL_patch_00_16"),
    ("30TUM", "30TUM_patch_00_16"), ("30TUN", "30TUN_patch_00_16"),
    ("30TVL", "30TVL_patch_00_13"), ("30TVM", "30TVM_patch_00_16"),
    ("30TVN", "30TVN_patch_00_16"), ("30TWL", "30TWL_patch_00_00"),
    ("30TWM", "30TWM_patch_00_16"), ("30TWN", "30TWN_patch_00_16"),
]

OUTPUT_DIR = "/home/rodrigo/Downloads/2023_Clasificacion_MCSNCyL/Datos brutos 2023"
DIAG_DIR   = "/home/rodrigo/Downloads/2023_Clasificacion_MCSNCyL/Datos brutos 2023/diagnosticos"
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(DIAG_DIR, exist_ok=True)

# five aspect-ratio presets  (tag, width, height); last one very wide
ASPECT_RATIOS = [
    ("1x1",   1, 1),
    ("4x3",   4, 3),
    ("16x9", 16, 9),
    ("3x2",   3, 2),
    ("21x9", 21, 9)
]

# ── AUX LOAD TIMESTAMPS ──────────────────────────────────────────
def get_timestamps_from_file(filepath):
    if not os.path.exists(filepath):
        return None
    try:
        # 1) grupo B01
        with xr.open_dataset(filepath, group="B01", mask_and_scale=False) as ds:
            if "time" in ds: return ds["time"].values
    except Exception: pass

    try:
        # 2) buscar en cualquier grupo
        with xr.open_dataset(filepath, mask_and_scale=False) as root:
            if hasattr(root, "groups"):
                for g in root.groups:
                    with xr.open_dataset(filepath, group=g, mask_and_scale=False) as dsg:
                        if "time" in dsg: return dsg["time"].values
            # 3) raíz
            if "time" in root: return root["time"].values
    except Exception: pass
    return None

# ── HELPER RUTAS ─────────────────────────────────────────────────
def filepaths_for_year(year):
    out=[]
    for tile, stem in PATCH_DEFINITIONS:
        f = os.path.join(BASE_NAS_PATH, str(year), tile, f"{year}_{stem}.nc")
        out.append((f, tile))
    return out

# ── MATPLOTLIB POR AÑO ──────────────────────────────────────────
def plot_year(year, out_widget):
    with out_widget:
        clear_output(wait=True)
        print(f"--- Processing year {year} ---")

        ts_dict={}
        for fp, tile in filepaths_for_year(year):
            ts=get_timestamps_from_file(fp)
            ts_dict[tile]=ts if ts is not None else []

        if not any(len(v)>0 for v in ts_dict.values()):
            print("No timestamps found.")
            return

        order=[t for t,_ in PATCH_DEFINITIONS]
        fig,ax=plt.subplots(figsize=(12, max(6, len(order)*0.45)))

        y_tick_labels=[]; y_tick_positions=[]
        for i,tile in enumerate(order):
            ts=ts_dict.get(tile,[])
            if len(ts)>0:
                ax.plot(ts, [i]*len(ts), 'o', ms=4)
            y_tick_positions.append(i)
            label = tile if len(ts) > 0 else f"{tile} (No data)"
            y_tick_labels.append(label)

        ax.set_yticks(y_tick_positions)
        ax.set_yticklabels(y_tick_labels)

        ax.set_xlabel("Timestamp")
        ax.set_ylabel("Tiles")
        ax.set_title(f"Timestamp Distribution per Tile – Year {year}")
        ax.grid(True, ls='--', alpha=.7)
        plt.tight_layout()

        # guardar PNGs en cinco relaciones de aspecto
        base_h=6
        for tag,w,h in ASPECT_RATIOS:
            fig.set_size_inches(base_h*w/h, base_h)
            fig.savefig(os.path.join(DIAG_DIR, f"timestamps_{year}_{tag}.png"),
                        dpi=300, bbox_inches="tight")
        print("PNG variants saved in diagnósticos/")
        plt.show()

# ── PLOTLY TODOS LOS AÑOS ───────────────────────────────────────
def plot_all_years(out_widget):
    with out_widget:
        clear_output(wait=True)
        print("--- Processing ALL years (Plotly) ---")

        rows=[]
        for yr in YEARS_TO_PROCESS:
            for fp,tile in filepaths_for_year(yr):
                ts=get_timestamps_from_file(fp)
                if ts is not None:
                    rows += [{"Timestamp": pd.to_datetime(t), "Tile":tile, "Year":yr} for t in ts]

        if not rows:
            print("No data found.")
            return

        df=pd.DataFrame(rows)
        fig=go.Figure()
        for yr,g in df.groupby("Year"):
            fig.add_trace(go.Scatter(
                x=g["Timestamp"], y=g["Tile"], mode="markers",
                name=str(yr), marker=dict(size=7,opacity=.7)
            ))

        fig.update_layout(
            title="Timestamp Distribution per Tile (All Years)",
            xaxis_title="Timestamp", yaxis_title="Tile",
            height=max(500,len(df['Tile'].unique())*40),
            yaxis=dict(categoryorder="array", categoryarray=[t for t,_ in PATCH_DEFINITIONS])
        )
        html_path=os.path.join(OUTPUT_DIR,"timestamps_ALL_YEARS_interactive.html")
        fig.write_html(html_path)
        for tag,w,h in ASPECT_RATIOS:
            fig.write_image(os.path.join(DIAG_DIR,f"timestamps_ALL_{tag}.png"),
                            width=int(600*w/h), height=600, scale=2)
        print("HTML + PNG variants saved.")
        fig.show(renderer="iframe")

# ── WIDGET UI ────────────────────────────────────────────────────
out_box = widgets.Output()

btns=[]
for yr in YEARS_TO_PROCESS:
    b=widgets.Button(description=str(yr), button_style="info",
                     layout=widgets.Layout(width="60px"))
    b.on_click(lambda _,y=yr: plot_year(y,out_box)); btns.append(b)

all_btn = widgets.Button(description="ALL years", button_style="success")
all_btn.on_click(lambda _: plot_all_years(out_box))

display(widgets.HBox(btns+[all_btn]))
display(out_box)


HBox(children=(Button(button_style='info', description='2020', layout=Layout(width='60px'), style=ButtonStyle(…

Output()

In [33]:
# ╔═══════════  TIMESTAMP-MAP v2 · English UI + counts + PNG variants ═════════╗
#  • Selector de año → mapa Folium con cada tesela coloreada ∝ nº timestamps
#  • El número exacto aparece encima del cuadrado (DivIcon).
#  • Título / leyenda en inglés.
#  • Para cada vista se guarda:
#       - HTML interactivo  (OUTPUT_DIR)
#       - 5 PNG en distintos aspect-ratios (DIAG_DIR) → 1×1, 4×3, 16×9, 3×2, 21×9
#    (necesita  pip install selenium pillow geckodriver-autoinstaller)
# ─────────────────────────────────────────────────────────────────────────────
import xarray as xr, numpy as np, os, pandas as pd, folium, json
from folium.plugins import HeatMap
import ipywidgets as wd
from IPython.display import display, clear_output
import geckodriver_autoinstaller, selenium.webdriver as wb, tempfile
from PIL import Image

# 1 · CONSTANTES Y RUTAS --------------------------------------------------
YEARS_TO_PROCESS = [2020, 2021, 2022, 2023, 2024]
BASE_NAS_PATH = "/run/user/1000/gvfs/smb-share:server=10.168.168.61,share=rodrigopg/parches_S4A_CyL"
OUTPUT_DIR = "/home/rodrigo/Downloads/2023_Clasificacion_MCSNCyL/Datos brutos 2023"
DIAG_DIR   = f"{OUTPUT_DIR}/diagnosticos"
os.makedirs(OUTPUT_DIR, exist_ok=True); os.makedirs(DIAG_DIR, exist_ok=True)

ASPECT_RATIOS = [("1x1",1,1), ("4x3",4,3), ("16x9",16,9), ("3x2",3,2), ("21x9",21,9)]

PATCH_DEFINITIONS = [   # … (igual que antes) …
    #   (code, stem) …
]

TESELA_CENTROIDS_EXAMPLE = {  # … tus coordenadas … }

CASTILLA_Y_LEON_GEOJSON_URL = "https://gist.githubusercontent.com/jsga/bde68149f50fb9a9cd399f3da7494260/raw/castilla_y_leon.geojson"

# 2 · EXTRACCIÓN DE TIMESTAMPS -------------------------------------------
def get_ts(filepath):
    if not os.path.exists(filepath): return None
    try:
        with xr.open_dataset(filepath, group="B01", mask_and_scale=False) as ds:
            if "time" in ds: return ds["time"].values
    except: pass
    try:
        with xr.open_dataset(filepath, mask_and_scale=False) as root:
            if "time" in root: return root["time"].values
            for g in root.groups:
                with xr.open_dataset(filepath, group=g, mask_and_scale=False) as dsg:
                    if "time" in dsg: return dsg["time"].values
    except: pass
    return None

# 3 · CONTAR TIMESTAMPS ---------------------------------------------------
def count_timestamps(year_sel):
    counts={c:0 for c,_ in PATCH_DEFINITIONS}
    years = YEARS_TO_PROCESS if year_sel=="All years" else [int(year_sel)]
    for yr in years:
        for tile,stem in PATCH_DEFINITIONS:
            ts=get_ts(os.path.join(BASE_NAS_PATH,str(yr),tile,f"{yr}_{stem}.nc"))
            if ts is not None: counts[tile]+=len(ts)
    return counts

# 4 · CREAR MAPA ----------------------------------------------------------
def build_map(counts, title):
    mean_lat=np.mean([v[0] for v in TESELA_CENTROIDS_EXAMPLE.values()])
    mean_lon=np.mean([v[1] for v in TESELA_CENTROIDS_EXAMPLE.values()])
    m=folium.Map(location=[mean_lat,mean_lon], zoom_start=7, tiles="CartoDB positron")

    folium.GeoJson(CASTILLA_Y_LEON_GEOJSON_URL,
        style_function=lambda _: dict(color="#007bff", weight=2, fillOpacity=0)
    ).add_to(m)

    vmax=max(counts.values()) or 1
    for tile,cnt in counts.items():
        lat,lon=TESELA_CENTROIDS_EXAMPLE.get(tile,(mean_lat,mean_lon))
        # color
        frac=cnt/vmax
        col = "#2c6b3e" if frac>0.66 else "#7bbf7e" if frac>0.33 else "#d9f0a6" if cnt else "gray"
        folium.RegularPolygonMarker(
            location=(lat,lon), number_of_sides=4, rotation=45, radius=30,
            color=col, fill_color=col, fill_opacity=.8,
            popup=f"<b>Tile:</b> {tile}<br><b>Timestamps:</b> {cnt}",
            tooltip=f"{tile}: {cnt}"
        ).add_to(m)
        # número encima
        folium.map.Marker(
            [lat,lon],
            icon=folium.DivIcon(html=f"""<div style="font-size:10px;font-weight:bold;
                                         color:black;text-align:center;
                                         transform:translate(-50%,-10px);">{cnt}</div>""")
        ).add_to(m)

    m.get_root().html.add_child(folium.Element(
        f'<h4 style="position:fixed; top:10px; left:50%; transform:translateX(-50%);'
        'z-index:9999; background:rgba(255,255,255,0.85);'
        'padding:4px 10px; border-radius:6px; font-weight:bold;">'
        f'{title}</h4>'))
    return m

# 5 · GUARDAR HTML + PNGs -------------------------------------------------
def save_map_outputs(map_obj, tag):
    html_path=os.path.join(OUTPUT_DIR,f"ts_map_{tag}.html")
    map_obj.save(html_path)
    # ---- PNGs via selenium (headless Firefox) ----
    geckodriver_autoinstaller.install()
    with tempfile.TemporaryDirectory() as tmp:
        tmp_html=os.path.join(tmp,"tmp.html"); map_obj.save(tmp_html)
        driver=wb.Firefox(options=wb.FirefoxOptions().add_argument("--headless"))
        driver.get("file://"+tmp_html)
        for ratio,w,h in ASPECT_RATIOS:
            driver.set_window_size(1200*w/h, 1200)  # keep height 1200px base
            png=driver.get_screenshot_as_png()
            img=Image.open(BytesIO(png))
            img.save(os.path.join(DIAG_DIR,f"ts_map_{tag}_{ratio}.png"))
        driver.quit()
    print("HTML + PNG variants saved.")

# 6 · WIDGET UI ----------------------------------------------------------
out_box=wd.Output()
yr_dd=wd.Dropdown(options=["All years"]+[str(y) for y in YEARS_TO_PROCESS],
                  value="All years", description="Select year:")

def refresh(_=None):
    with out_box:
        clear_output(wait=True)
        yr=yr_dd.value
        counts=count_timestamps(yr)
        title="Timestamp count per Tile – " + ("All Years" if yr=="All years" else yr)
        m=build_map(counts,title)
        display(m)
        save_map_outputs(m, yr.replace(" ","_"))

yr_dd.observe(refresh, names="value")
display(yr_dd); display(out_box)
refresh()  # initial


Dropdown(description='Select year:', options=('All years', '2020', '2021', '2022', '2023', '2024'), value='All…

Output()

In [8]:
# ────────────────────────────────────────────────────────────────
# Script 3 · Comparación visual de "labels" entre años por tesela
# ────────────────────────────────────────────────────────────────
#
# Requisitos extra (ya los usas en los otros notebooks):
#   pip install xarray matplotlib ipywidgets
#
# Idea:
#   • Selecciona 4-5 teselas (al azar o fijas).
#   • Para cada tesela carga la variable `labels` de todos los años.
#   • Muestra un grid Year × Tesela con los rásters (colormap discreto).
#   • Señala con texto en cada panel si la matriz difiere con respecto a 1)
#     la del año anterior y 2) el resto de años (hash MD5).
#   • Opción interactiva: elegir manualmente la tesela y los años.
# ────────────────────────────────────────────────────────────────
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import hashlib, os, random
import ipywidgets as widgets
from IPython.display import display, clear_output

# --- Configuración global reutilizada ---
YEARS_TO_PROCESS = [2020, 2021, 2022, 2023, 2024]
BASE_NAS_PATH    = "/run/user/1000/gvfs/smb-share:server=10.168.168.61,share=rodrigopg/parches_S4A_CyL"
PATCH_DEFS = [
    ("29TPE", "29TPE_patch_29_09"),
    ("29TPF", "29TPF_patch_20_15"),
    ("29TPG", "29TPG_patch_17_10"), 
    ("29TPH", "29TPH_patch_13_29"),
    ("29TQE", "29TQE_patch_00_00"),
    ("29TQF", "29TQF_patch_00_16"),
    ("29TQG", "29TQG_patch_02_22"), 
    ("29TQH", "29TQH_patch_00_16"),
    ("30TUK", "30TUK_patch_00_00"),
    ("30TUL", "30TUL_patch_00_16"),
    ("30TUM", "30TUM_patch_00_16"),
    ("30TUN", "30TUN_patch_00_16"),
    ("30TVL", "30TVL_patch_00_13"), 
    ("30TVM", "30TVM_patch_00_16"),
    ("30TVN", "30TVN_patch_00_16"),
    ("30TWL", "30TWL_patch_00_00"), 
    ("30TWM", "30TWM_patch_00_16"),
    ("30TWN", "30TWN_patch_00_16"),
]

LABEL_GROUP_NAME  = "labels"   # ⇦ cambia si tu variable está en otra rama
LABEL_VAR_NAME    = "labels"   # ⇦ idem (puedes poner "crop_id", etc.)

# --- Utilidades -------------------------------------------------
def build_path(year, tesela_code, stem):
    return os.path.join(BASE_NAS_PATH, str(year), tesela_code,
                        f"{year}_{stem}.nc")

def load_label_raster(path):
    """
    Devuelve (array2D, shape, md5_hex) o (None, None, None) si no existe.
    """
    if not os.path.exists(path):
        return None, None, None
    try:
        ds = xr.open_dataset(path, group=LABEL_GROUP_NAME, mask_and_scale=False)
        arr = ds[LABEL_VAR_NAME].values
        ds.close()
        # Convertir a int32 para hashing estable y sin escalas
        arr_int = arr.astype("int32", copy=False)
        md5 = hashlib.md5(arr_int.tobytes()).hexdigest()
        return arr_int, arr_int.shape, md5
    except Exception as e:
        print(f"⚠️  No se pudo leer {path}: {e}")
        return None, None, None

# --- Selección de teselas --------------------------------------
FIXED_SAMPLE = ["29TPE", "29TPF", "30TUK", "30TWL", "30TVM"]  # puedes cambiar
TES_ELA_TO_STEM = {t: s for t, s in PATCH_DEFS}

def choose_sample(sample_size=5):
    codes = [t for t, _ in PATCH_DEFS]
    return random.sample(codes, sample_size)

# --- Ploteo comparativo ----------------------------------------
def plot_label_grids(selected_teselas, selected_years):
    n_rows = len(selected_teselas)
    n_cols = len(selected_years)

    fig, axes = plt.subplots(n_rows, n_cols,
                             figsize=(3*n_cols, 3*n_rows),
                             sharex=True, sharey=True,
                             squeeze=False)
    cmap = "tab20"  # discreto; ajusta si tienes >20 clases

    for r, tesela in enumerate(selected_teselas):
        stem = TES_ELA_TO_STEM[tesela]
        ref_hash = None  # para comparar con el primer año
        for c, yr in enumerate(selected_years):
            ax = axes[r, c]
            path = build_path(yr, tesela, stem)
            arr, shape, md5 = load_label_raster(path)

            title = f"{tesela}\n{yr}"
            if arr is None:
                ax.set_title(title + "\n(SIN ARCHIVO)", fontsize=9, color="red")
                ax.axis("off")
                continue

            im = ax.imshow(arr, cmap=cmap, interpolation="nearest")
            ax.set_xticks([]); ax.set_yticks([])

            # Anotar shape y cambios
            diff_prev = diff_ref = ""
            if c > 0:  # comparar con año anterior en misma tesela
                prev_md5 = axes[r, c-1].__dict__.get("_md5")
                if prev_md5 and prev_md5 != md5:
                    diff_prev = "⟂"  # distinto al anterior
            if ref_hash is None:
                ref_hash = md5
            elif ref_hash != md5:
                diff_ref = "*"      # distinto al año de referencia

            ax.set_title(f"{title}\n{shape} {diff_prev}{diff_ref}",
                         fontsize=8)
            # Guardar md5 en el eje para comparaciones posteriores
            ax._md5 = md5

        # Etiqueta de fila
        axes[r, 0].set_ylabel(tesela, fontsize=10, rotation=0,
                              labelpad=35, va="center")

    # Leyenda de clases (solo si ≤ 20 únicas globales)
    unique_vals = np.unique([axes[r, c]._md5
                             for r in range(n_rows)
                             for c in range(n_cols)
                             if hasattr(axes[r, c], "_md5")])
    plt.tight_layout()
    plt.show()

# --- Widgets interactivos --------------------------------------
tesela_selector = widgets.SelectMultiple(
    options=[t for t, _ in PATCH_DEFS],
    value=tuple(FIXED_SAMPLE),
    description="Teselas:",
    rows=10, style={"description_width": "initial"}
)
year_selector = widgets.SelectMultiple(
    options=YEARS_TO_PROCESS,
    value=tuple(YEARS_TO_PROCESS),
    description="Años:",
    rows=5, style={"description_width": "initial"}
)
run_btn = widgets.Button(description="Comparar", button_style="primary")

out = widgets.Output()

def on_run_clicked(b):
    with out:
        clear_output(wait=True)
        sel_tes = list(tesela_selector.value)
        sel_yrs = list(year_selector.value)
        if not sel_tes or not sel_yrs:
            print("Selecciona al menos una tesela y un año.")
            return
        print(f"Mostrando {len(sel_tes)} tesela(s) × {len(sel_yrs)} año(s)…")
        plot_label_grids(sel_tes, sel_yrs)

run_btn.on_click(on_run_clicked)

display(widgets.VBox([widgets.HBox([tesela_selector, year_selector]),
                      run_btn, out]))

# Para lanzar algo por defecto (solo si estás en un script, no en notebook):
# on_run_clicked(None)


VBox(children=(HBox(children=(SelectMultiple(description='Teselas:', index=(0, 1, 8, 15, 13), options=('29TPE'…

In [9]:
# ────────────────────────────────────────────────────────────────
# Script 4 · Recuento de parches NetCDF por Tesela y Año
# ────────────────────────────────────────────────────────────────
#
# Dependencias adicionales (todas habituales):
#   pip install pandas ipywidgets matplotlib seaborn
#
# Objetivo:
#   • Contar .nc por carpeta  AÑO/TESELA/
#   • Mostrar tabla Tesela × Año con el número de archivos.
#   • Calcular media y Δ% por tesela; marcar desviaciones (>±10 %) en rojo.
#   • Heatmap opcional para ver de un vistazo las diferencias.
# ────────────────────────────────────────────────────────────────
import os, glob, pandas as pd, numpy as np
import matplotlib.pyplot as plt
import seaborn as sns                    # solo para el heatmap
import ipywidgets as widgets
from IPython.display import display, clear_output

# --- Configuración común ---------------------------------------
YEARS_TO_PROCESS = [2020, 2021, 2022, 2023, 2024]
BASE_NAS_PATH    = "/run/user/1000/gvfs/smb-share:server=10.168.168.61,share=rodrigopg/parches_S4A_CyL"
PATCH_DEFS       = [t for t, _ in [
    ("29TPE", "29TPE_patch_29_09"),
    ("29TPF", "29TPF_patch_20_15"),
    ("29TPG", "29TPG_patch_17_10"), 
    ("29TPH", "29TPH_patch_13_29"),
    ("29TQE", "29TQE_patch_00_00"),
    ("29TQF", "29TQF_patch_00_16"),
    ("29TQG", "29TQG_patch_02_22"), 
    ("29TQH", "29TQH_patch_00_16"),
    ("30TUK", "30TUK_patch_00_00"),
    ("30TUL", "30TUL_patch_00_16"),
    ("30TUM", "30TUM_patch_00_16"),
    ("30TUN", "30TUN_patch_00_16"),
    ("30TVL", "30TVL_patch_00_13"), 
    ("30TVM", "30TVM_patch_00_16"),
    ("30TVN", "30TVN_patch_00_16"),
    ("30TWL", "30TWL_patch_00_00"), 
    ("30TWM", "30TWM_patch_00_16"),
    ("30TWN", "30TWN_patch_00_16"),
]
]


# Ruta donde guardar la imagen del heat-map
HEATMAP_DIR = "/home/rodrigo/Downloads/2023_Clasificacion_MCSNCyL/Datos brutos 2023/diagnosticos"
os.makedirs(HEATMAP_DIR, exist_ok=True)



# --- Función principal -----------------------------------------
def build_counts_df():
    """
    Devuelve un DataFrame Tesela × Año con # de archivos .nc encontrados.
    """
    counts = []
    for tesela in PATCH_DEFS:
        row = {"Tesela": tesela}
        for yr in YEARS_TO_PROCESS:
            # Ejemplo de patrón: 2023/29TPE/*.nc
            dir_path = os.path.join(BASE_NAS_PATH, str(yr), tesela)
            if not os.path.exists(dir_path):
                n = np.nan
            else:
                n = len(glob.glob(os.path.join(dir_path, "*.nc")))
            row[yr] = n
        counts.append(row)
    df = pd.DataFrame(counts).set_index("Tesela").sort_index()
    return df

def mark_anomalies(df, thresh=0.10):
    """
    Devuelve un DataFrame de strings, coloreando celdas con desviación > ±10 %.
    """
    styled = df.astype("object").copy()      # ←  cambio clave
    for tesela in styled.index:
        vals = df.loc[tesela].dropna().astype(float)  # sigue usando el original numérico
        if vals.empty:
            continue
        mean = vals.mean()
        for yr in vals.index:
            val = vals[yr]
            if np.isnan(val):
                continue
            if abs(val - mean) / mean > thresh:
                styled.at[tesela, yr] = f"**{int(val)}**"
            else:
                styled.at[tesela, yr] = f"{int(val)}"
    return styled


# --- Widgets ----------------------------------------------------
rebuild_btn = widgets.Button(description="Actualizar recuentos", button_style="info")
heatmap_chk = widgets.Checkbox(value=True, description="Mostrar heatmap")

out = widgets.Output()

def run_counts(_=None):
    with out:
        clear_output(wait=True)
        df_counts = build_counts_df()
        print("Tabla de recuento de archivos .nc por tesela y año:")
        styled = mark_anomalies(df_counts)
        display(styled)

        if heatmap_chk.value:
            plt.figure(figsize=(len(YEARS_TO_PROCESS)*1.2, len(PATCH_DEFS)*0.4 + 2))
            sns.heatmap(df_counts, annot=True, fmt=".0f",
                         cmap="YlGnBu", cbar_kws={"label": "# files"})
            plt.title("Heatmap · Number of patches per tile and year")
            plt.ylabel("Tile"); plt.xlabel("Year")
            plt.tight_layout()
            img_path = os.path.join(HEATMAP_DIR, "heatmap_num_patches_tesela_año.png")
            plt.savefig(img_path, dpi=300, bbox_inches="tight")
            print(f"📁  Heat-map guardado en: {img_path}")
            plt.show()

rebuild_btn.on_click(run_counts)

display(widgets.HBox([rebuild_btn, heatmap_chk]))
display(out)

# Ejecutar una primera vez al cargar
run_counts()


HBox(children=(Button(button_style='info', description='Actualizar recuentos', style=ButtonStyle()), Checkbox(…

Output()

In [10]:
# ────────────────────────────────────────────────────────────────
# SCRIPT 5 · Recuento de Timestamps por Tesela y Año  (versión corregida)
# ────────────────────────────────────────────────────────────────
#
# Lee **todos** los ficheros .nc de tu dataset, busca la variable `time`
# allí donde esté (raíz o cualquier sub-grupo) y cuenta cuántos timestamps
# hay por combinación Tesela-Año.  Muestra:
#   • Una tabla Tesela × Año con los recuentos, resaltando desviaciones
#     > ±10 % respecto a la media multianual de la misma tesela.
#   • Un heat-map opcional para ver huecos/“outliers” de un vistazo.
#
#  Requisitos : netCDF4 · xarray · pandas · numpy · matplotlib · seaborn · ipywidgets
# ────────────────────────────────────────────────────────────────
import os, glob, netCDF4, pandas as pd, numpy as np
import matplotlib.pyplot as plt, seaborn as sns
import ipywidgets as widgets
from IPython.display import display, clear_output

# 1 · CONFIGURACIÓN
# ----------------------------------------------------------------
YEARS_TO_PROCESS = [2020, 2021, 2022, 2023, 2024]
BASE_NAS_PATH    = "/run/user/1000/gvfs/smb-share:server=10.168.168.61,share=rodrigopg/parches_S4A_CyL"

# ⚠️ Asegúrate de que cada stem coincide EXACTAMENTE con tus nombres reales
PATCH_DEFS = [
    ("29TPE", "29TPE_patch_29_09"),
    ("29TPF", "29TPF_patch_20_15"),
    ("29TPG", "29TPG_patch_17_10"),
    ("29TPH", "29TPH_patch_13_29"),
    ("29TQE", "29TQE_patch_00_00"),
    ("29TQF", "29TQF_patch_00_16"),
    ("29TQG", "29TQG_patch_02_22"),   
    ("29TQH", "29TQH_patch_00_16"),
    ("30TUK", "30TUK_patch_00_00"),
    ("30TUL", "30TUL_patch_00_16"),
    ("30TUM", "30TUM_patch_00_16"),
    ("30TUN", "30TUN_patch_00_16"),
    ("30TVL", "30TVL_patch_00_13"),  
    ("30TVM", "30TVM_patch_00_16"),
    ("30TVN", "30TVN_patch_00_16"),
    ("30TWL", "30TWL_patch_00_00"),
    ("30TWM", "30TWM_patch_00_16"),
    ("30TWN", "30TWN_patch_00_16"),
]
TESELA_TO_STEM = dict(PATCH_DEFS)  # para búsqueda rápida


# Ruta donde guardar la imagen del heat-map
HEATMAP_DIR = "/home/rodrigo/Downloads/2023_Clasificacion_MCSNCyL/Datos brutos 2023/diagnosticos"
os.makedirs(HEATMAP_DIR, exist_ok=True)


# 2 · FUNCIÓN ROBUSTA DE LECTURA
# ----------------------------------------------------------------
def robust_count_time(nc_path: str) -> int:
    """
    Busca la variable 'time' en raíz o en cualquier sub-grupo y devuelve
    su longitud.  Si el archivo no existe o no se encuentra 'time', → 0.
    """
    if not os.path.exists(nc_path):
        return 0
    try:
        root = netCDF4.Dataset(nc_path, "r")
        # 1 ) raíz
        if "time" in root.variables:
            n = len(root.variables["time"])
            root.close(); return n
        # 2 ) sub-grupos (búsqueda profunda)
        for g in root.groups.values():
            if "time" in g.variables:
                n = len(g.variables["time"])
                root.close(); return n
        root.close()
    except Exception as e:
        print(f"⛔  Error leyendo {nc_path}: {e}")
    return 0


# 3 · CONSTRUIR DATAFRAME TES × AÑO
# ----------------------------------------------------------------
def build_timestamp_df() -> pd.DataFrame:
    rows = []
    for tesela, stem in PATCH_DEFS:
        row = {"Tesela": tesela}
        for yr in YEARS_TO_PROCESS:
            nc_path = os.path.join(BASE_NAS_PATH, str(yr), tesela,
                                   f"{yr}_{stem}.nc")
            row[yr] = robust_count_time(nc_path)
        rows.append(row)
    return pd.DataFrame(rows).set_index("Tesela").sort_index()


# 4 · ESTILO CON DESVIACIÓN ±10 %
# ----------------------------------------------------------------
def style_anomalies(df: pd.DataFrame, thresh: float = 0.10): #-> pd.io.formats.style.Styler:
    def highlight(val, mean):
        if pd.isna(val):
            return "background-color:#d0d0d0"
        if mean == 0:
            return ""
        if abs(val - mean) / mean > thresh:
            return "background-color:#ffcccc"  # rojo suave
        return ""
    def row_style(row):
        mean = row.mean()
        return [highlight(v, mean) for v in row]
    return df.style.apply(row_style, axis=1).format("{:.0f}")


# 5 · IU CON IPYWIDGETS
# ----------------------------------------------------------------
btn_refresh = widgets.Button(description="Actualizar recuentos TS", button_style="info")
chk_heatmap = widgets.Checkbox(value=True, description="Mostrar heat-map")
out_display = widgets.Output()

def run_analysis(_=None):
    with out_display:
        clear_output(wait=True)
        df = build_timestamp_df()
        print("Recuento de timestamps por Tesela y Año:")
        display(style_anomalies(df))

        if chk_heatmap.value:
            plt.figure(figsize=(len(YEARS_TO_PROCESS)*1.3, len(df)*0.4 + 2))
            sns.heatmap(df, annot=True, fmt=".0f",
                        cmap="YlOrRd", cbar_kws={"label": "# timestamps"})
            plt.title("Heat-map · Timestamps per tile and year")
            plt.ylabel("Tile"); plt.xlabel("Year")
            plt.tight_layout()
            img_path = os.path.join(HEATMAP_DIR, "heatmap_timestamps_tesela_año.png")
            plt.savefig(img_path, dpi=300, bbox_inches="tight")
            print(f"📁  Heat-map guardado en: {img_path}")
            plt.show()

btn_refresh.on_click(run_analysis)

display(widgets.HBox([btn_refresh, chk_heatmap]))
display(out_display)

# Ejecución inicial
run_analysis()


HBox(children=(Button(button_style='info', description='Actualizar recuentos TS', style=ButtonStyle()), Checkb…

Output()

In [11]:
# ────────────────────────────────────────────────────────────────
# SCRIPT 6 · Detección de Timestamps Duplicados  (fechas ISO)
#  ▸ Guarda en  …/diagnosticos :
#      • duplicates_summary.csv
#      • duplicates_detailed.csv   ← ahora con fechas ISO-8601
#      • heatmap_duplicates.png
# ────────────────────────────────────────────────────────────────
import os, netCDF4, numpy as np, pandas as pd
import matplotlib.pyplot as plt, seaborn as sns
import ipywidgets as widgets
from IPython.display import display, clear_output
from datetime import datetime

# 1 · CONFIGURACIÓN
YEARS_TO_PROCESS = [2020, 2021, 2022, 2023, 2024]
BASE_NAS_PATH = "/run/user/1000/gvfs/smb-share:server=10.168.168.61,share=rodrigopg/parches_S4A_CyL"
OUTPUT_DIR = "/home/rodrigo/Downloads/2023_Clasificacion_MCSNCyL/Datos brutos 2023/diagnosticos"
os.makedirs(OUTPUT_DIR, exist_ok=True)

PATCH_DEFINITIONS = [
    ("29TPE", "29TPE_patch_29_09"), ("29TPF", "29TPF_patch_20_15"),
    ("29TPG", "29TPG_patch_17_10"), ("29TPH", "29TPH_patch_13_29"),
    ("29TQE", "29TQE_patch_00_00"), ("29TQF", "29TQF_patch_00_16"),
    ("29TQG", "29TQG_patch_02_22"), ("29TQH", "29TQH_patch_00_16"),
    ("30TUK", "30TUK_patch_00_00"), ("30TUL", "30TUL_patch_00_16"),
    ("30TUM", "30TUM_patch_00_16"), ("30TUN", "30TUN_patch_00_16"),
    ("30TVL", "30TVL_patch_00_13"), ("30TVM", "30TVM_patch_00_16"),
    ("30TVN", "30TVN_patch_00_16"), ("30TWL", "30TWL_patch_00_00"),
    ("30TWM", "30TWM_patch_00_16"), ("30TWN", "30TWN_patch_00_16"),
]

CSV_SUMMARY  = os.path.join(OUTPUT_DIR, "duplicates_summary.csv")
CSV_DETAILED = os.path.join(OUTPUT_DIR, "duplicates_detailed.csv")
PNG_HEATMAP  = os.path.join(OUTPUT_DIR, "heatmap_duplicates.png")

# 2 · UTILIDADES
def _read_time_var(ds):
    """Devuelve (array, units, calendar) de la primera variable time encontrada."""
    if "time" in ds.variables:
        v = ds.variables["time"]
        return v[:], getattr(v, "units", None), getattr(v, "calendar", "standard")
    for g in ds.groups.values():
        if "time" in g.variables:
            v = g.variables["time"]
            return v[:], getattr(v, "units", None), getattr(v, "calendar", "standard")
    return None, None, None

def extract_time_values(path_nc):
    if not os.path.exists(path_nc):
        return None, None, None
    try:
        ds = netCDF4.Dataset(path_nc, "r")
        arr, units, calendar = _read_time_var(ds)
        ds.close()
        return arr, units, calendar
    except Exception as e:
        print(f"⛔  Error leyendo {path_nc}: {e}")
        return None, None, None

def to_iso(arr, units, calendar):
    """Convierte un array numérico de tiempo a lista ISO-8601."""
    if arr is None:
        return []
    try:
        dates = netCDF4.num2date(arr, units=units, calendar=calendar,
                                 only_use_cftime_datetimes=False)
        return [d.isoformat() if isinstance(d, datetime) else str(d) for d in dates]
    except Exception:
        # fallback simple: días desde 1970-01-01
        return [(datetime(1970,1,1) + pd.Timedelta(days=float(x))).isoformat() for x in arr]

def count_duplicates(arr, tol_sec=30):
    if arr is None:
        return np.nan
    # pasa a segundos (suponiendo units = days)
    arr_sec = arr * 86400
    arr_round = np.round(arr_sec / tol_sec).astype(int)  # agrupa en ventanas de 30 s
    _, counts = np.unique(arr_round, return_counts=True)
    return int(np.sum(np.maximum(counts - 1, 0)))


# 3 · CREAR TABLAS
def build_tables(verbose=False):
    rows, detailed = [], []
    for tesela, stem in PATCH_DEFINITIONS:
        row = {"Tesela": tesela}
        for yr in YEARS_TO_PROCESS:
            path = os.path.join(BASE_NAS_PATH, str(yr), tesela, f"{yr}_{stem}.nc")
            arr, units, cal = extract_time_values(path)
            dup = count_duplicates(arr)
            row[yr] = dup
            if verbose and dup and not np.isnan(dup):
                uniq, cnt = np.unique(arr, return_counts=True)
                dups = uniq[cnt > 1]
                dups_iso = to_iso(dups, units, cal)
                detailed.append({
                    "Tesela": tesela,
                    "Año": yr,
                    "Nº duplicados": dup,
                    "Fechas ISO duplicadas": dups_iso
                })
        rows.append(row)
    return (pd.DataFrame(rows).set_index("Tesela").sort_index(),
            pd.DataFrame(detailed))

# 4 · IU
btn_run  = widgets.Button(description="Comprobar duplicados", button_style="warning")
chk_det  = widgets.Checkbox(value=False, description="Guardar listado detallado")
out_box  = widgets.Output()

def run(_=None):
    with out_box:
        clear_output(wait=True)
        df_sum, df_det = build_tables(verbose=chk_det.value)
        df_sum.to_csv(CSV_SUMMARY)
        print(f"✓ Resumen guardado en {CSV_SUMMARY}")
        if chk_det.value and not df_det.empty:
            df_det.to_csv(CSV_DETAILED, index=False)
            print(f"✓ Detalle guardado en {CSV_DETAILED}")

        display(df_sum.style.background_gradient(
            cmap="Reds", vmin=0, vmax=max(1, df_sum.max().max())
        ).format("{:.0f}"))

        # Heat-map
        plt.figure(figsize=(len(YEARS_TO_PROCESS)*1.3, len(df_sum)*0.4 + 2))
        sns.heatmap(df_sum, annot=True, fmt=".0f",
                    cmap="Reds", cbar_kws={"label": "Nº duplicados"})
        plt.title("Heat-map · Timestamps duplicados por Tesela y Año")
        plt.xlabel("Año"); plt.ylabel("Tesela")
        plt.tight_layout()
        plt.savefig(PNG_HEATMAP, dpi=300, bbox_inches="tight")
        print(f"✓ Heat-map guardado en {PNG_HEATMAP}")
        plt.show()

btn_run.on_click(run)

display(widgets.HBox([btn_run, chk_det]))
display(out_box)

# Ejecución inicial
run()




Output()

In [25]:
# ╔════════════  PATCH VIEWER v17 · overlay style selector ════════════╗
# · Nuevo control “Overlay style”:  
#       • Color (+ leyenda)   ← comportamiento de v16  
#       • Grayscale (sin leyenda) – muestra máscara en blanco-y-negro
# · Si la banda es “RGB composite” → selector Colormap desactivado
# · Cache en RAM, indicador ⏳, PNG, leyenda discreta de cultivos (modo color)
# ──────────────────────────────────────────────────────────────────────
import os, netCDF4, xarray as xr, numpy as np, matplotlib.pyplot as plt
import matplotlib as mpl, matplotlib.patches as mpatches
import pandas as pd, ipywidgets as wd
from pathlib import Path
from IPython.display import display, clear_output

# ── rutas --------------------------------------------------------------
BASE_NAS = Path("/run/user/1000/gvfs/smb-share:server=10.168.168.61,"
                "share=rodrigopg/parches_S4A_CyL")
OUT_DIR  = Path("/home/rodrigo/Downloads/2023_Clasificacion_MCSNCyL/"
                "Datos brutos 2023/diagnosticos"); OUT_DIR.mkdir(parents=True, exist_ok=True)

CSV_CANDIDATES = [
    Path("/run/user/1000/gvfs/smb-share:server=10.168.168.61,"
         "share=rodrigopg/cultivo_map_final.csv"),
    Path("/mnt/data/cultivo_map_final.csv")
]

PATCH_SELECTIONS = [
    (2020, "29TPF", "29TPF_patch_20_15"),
    (2021, "29TPG", "29TPG_patch_17_10"),
    (2022, "29TPH", "29TPH_patch_13_29"),
    (2023, "29TQG", "29TQG_patch_02_22"),
    (2024, "30TUM", "30TUM_patch_20_15"),
    (2024, "30TUM", "30TUM_patch_00_16"),
    (2024, "30TUK", "30TUK_patch_00_00")
]

# ── mapa cultivos -------------------------------------------------------
crop_map={}
for p in CSV_CANDIDATES:
    if p.exists():
        df=pd.read_csv(p,header=None,names=["Crop","Code"])
        crop_map={int(r.Code):r.Crop for r in df.itertuples()}
        print(f"✓ Crop map loaded ({len(crop_map)}) from {p}")
        break

# ── utilidades ----------------------------------------------------------
def open_ds(root, grp):
    return xr.open_dataset(xr.backends.NetCDF4DataStore(root[grp]), mask_and_scale=False)

def load_patch_disk(y, tile, stem):
    f = BASE_NAS / str(y) / tile / f"{y}_{stem}.nc"
    root = netCDF4.Dataset(f, "r")
    bands = {g: open_ds(root, g) for g in root.groups if g.startswith("B")}
    parc  = open_ds(root, "parcels"); lab = open_ds(root, "labels")
    return bands, parc, lab

def stretch(a,p1=2,p2=98):
    lo,hi=np.nanpercentile(a,[p1,p2]); return np.clip((a-lo)/(hi-lo+1e-9),0,1)

def resize_to(a,tgt):
    if a.shape==tgt: return a
    fy=tgt[0]//a.shape[0]; fx=tgt[1]//a.shape[1]
    return np.repeat(np.repeat(a,fy,0),fx,1)

def discrete_cmap(n, base="nipy_spectral"):
    base_c=mpl.cm.get_cmap(base)
    colors=[(0,0,0,1)] + list(base_c(np.linspace(0.10,1.00,n)))
    return mpl.colors.ListedColormap(colors)

# ── widgets -------------------------------------------------------------
COLORMAPS = ["plasma","viridis","inferno","magma","gray","terrain","cividis","cubehelix"]

patch_dd   = wd.Dropdown(options={f"{y}-{t}-{s}":(y,t,s) for y,t,s in PATCH_SELECTIONS},
                         description="Patch:")
band_dd    = wd.Dropdown(description="Band:")
cmap_dd    = wd.Dropdown(options=COLORMAPS, value="plasma", description="Colormap:")
style_dd   = wd.Dropdown(options=["Color + legend","Grayscale (no legend)"],
                         value="Color + legend", description="Overlay style:")
ts_slider  = wd.IntSlider(description="Timestamp")
overlay_rb = wd.RadioButtons(options=["None","Parcels","Labels"], description="Overlay:")
btn_png    = wd.Button(description="Save PNG", button_style="success")
lbl_busy   = wd.Label(value="")
out_fig    = wd.Output()

_state, _cache = {}, {}

# ── UI helpers ----------------------------------------------------------
def set_busy(on=True): lbl_busy.value="⏳ Loading…" if on else ""

def update_bands():
    if "bands" not in _state:            # ← nuevo guard
        return
    band_dd.options = ["RGB composite"] + sorted(_state["bands"].keys())
    if band_dd.value not in band_dd.options:
        band_dd.value = "RGB composite"
    cmap_dd.disabled = (band_dd.value == "RGB composite")


# ── callbacks -----------------------------------------------------------
def on_patch_change(_=None):
    if patch_dd.value is None: return
    set_busy(True)
    y,tile,stem = patch_dd.value; pid=f"{y}_{tile}_{stem}"
    if pid in _cache: bands,parc,lab=_cache[pid]
    else:
        bands,parc,lab=load_patch_disk(y,tile,stem); _cache[pid]=(bands,parc,lab)
    _state.update(bands=bands,parc=parc,lab=lab)
    ts_slider.max=next(iter(bands.values()))["time"].size-1; ts_slider.value=0
    update_bands(); draw(); set_busy(False)

def get_overlay():
    sel=overlay_rb.value
    if sel=="None": return None,None,None
    ds,key = (_state["parc"],"parcels") if sel=="Parcels" else (_state["lab"],"labels")
    arr=ds[key] if "time" not in ds[key].dims else ds[key].isel(time=ts_slider.value)
    data=arr.values
    if sel=="Labels":
        present=np.unique(data); present=present[present!=0]; n=len(present)
        cmap=discrete_cmap(n)
        remap={c:i+1 for i,c in enumerate(sorted(present))}
        data=np.vectorize(lambda x: remap.get(x,0))(data)
        labels=[crop_map.get(c,str(c)) for c in sorted(present)]
        return data,cmap,labels
    cmap=discrete_cmap(int(data.max()))
    return data,cmap,None

def make_raster():
    b,t=band_dd.value,ts_slider.value; bands=_state["bands"]
    ref=_state["parc"]["parcels"].shape
    if b=="RGB composite" and all(k in bands for k in ("B04","B03","B02")):
        r=resize_to(stretch(bands["B04"]["B04"][t]),ref)
        g=resize_to(stretch(bands["B03"]["B03"][t]),ref)
        b_=resize_to(stretch(bands["B02"]["B02"][t]),ref)
        return np.dstack([r,g,b_]),None
    arr=resize_to(bands[b][b][t],ref)
    return stretch(arr), cmap_dd.value

def draw(*_):
    if not _state: return
    out_fig.clear_output(wait=True)
    ov_arr,ov_cmap,labels=get_overlay()
    ras,ras_cmap=make_raster()
    gray_mode=(style_dd.value=="Grayscale (no legend)")
    if gray_mode and ov_arr is not None:
        ov_cmap="gray"

    with out_fig:
        fig,(axL,axR)=plt.subplots(1,2,figsize=(11,5))
        # left panel
        axL.imshow(ras if ras_cmap is None else ras, cmap=ras_cmap or None)
        if ov_arr is not None:
            axL.imshow(ov_arr, cmap=ov_cmap, alpha=.3)
        axL.axis("off"); axL.set_title(f"{band_dd.value}")
        # right panel
        axR.axis("off")
        if ov_arr is not None:
            im=axR.imshow(ov_arr, cmap=ov_cmap)
            if not gray_mode:
                if overlay_rb.value=="Labels" and labels:
                    handles=[mpatches.Patch(color=ov_cmap(i+1),label=l)
                             for i,l in enumerate(labels)]
                    axR.legend(handles=handles, bbox_to_anchor=(1.02,0.5),
                               loc="center left", fontsize=6, frameon=False)
                else:
                    fig.colorbar(im, ax=axR, fraction=.046, pad=.02).ax.tick_params(labelsize=6)
            axR.set_title(overlay_rb.value + (" (gray)" if gray_mode else ""))
        else:
            axR.text(.5,.5,"(empty)",ha="center",va="center")
        year, tile, stem = patch_dd.value            # ← desglosamos la tupla
        fig.suptitle(f"{year} – {stem}  |  t={ts_slider.value}", fontsize=11)
        plt.tight_layout(); plt.show()
        
    _state["fig"] = fig

def save_png(_):
    fig = _state.get("fig")
    if fig is None:
        print("⚠️  No figure to save yet.")
        return
    out = OUT_DIR / f"patch_view_{patch_dd.label}_band{band_dd.value}_t{ts_slider.value}.png"
    fig.savefig(out, dpi=300, bbox_inches="tight")
    print(f"✓ Saved → {out}")

# bind
for w in (patch_dd, band_dd, cmap_dd, ts_slider, overlay_rb, style_dd):
    w.observe(lambda *_:(set_busy(True),draw(),set_busy(False)), names="value")
patch_dd.observe(on_patch_change, names="value")
btn_png.on_click(save_png)
band_dd.observe(lambda *_: update_bands(), names="value")

# UI layout
ui=wd.VBox([
    patch_dd,
    wd.HBox([band_dd, cmap_dd, ts_slider]),
    overlay_rb,
    style_dd,
    wd.HBox([btn_png, lbl_busy]),
    out_fig
])
display(ui)
on_patch_change()


✓ Crop map loaded (313) from /run/user/1000/gvfs/smb-share:server=10.168.168.61,share=rodrigopg/cultivo_map_final.csv


VBox(children=(Dropdown(description='Patch:', options={'2020-29TPF-29TPF_patch_20_15': (2020, '29TPF', '29TPF_…

In [27]:
# ╔════════════  PATCH VIEWER v18 · 3 overlay styles ═════════════╗
import os, netCDF4, xarray as xr, numpy as np, matplotlib.pyplot as plt
import matplotlib as mpl, matplotlib.patches as mpatches
import pandas as pd, ipywidgets as wd
from pathlib import Path
from IPython.display import display, clear_output

# ── paths --------------------------------------------------------------
BASE_NAS = Path("/run/user/1000/gvfs/smb-share:server=10.168.168.61,"
                "share=rodrigopg/parches_S4A_CyL")
OUT_DIR  = Path("/home/rodrigo/Downloads/2023_Clasificacion_MCSNCyL/"
                "Datos brutos 2023/diagnosticos")
OUT_DIR.mkdir(parents=True, exist_ok=True)

CSV_CANDIDATES = [
    Path("/run/user/1000/gvfs/smb-share:server=10.168.168.61,"
         "share=rodrigopg/cultivo_map_final.csv"),
    Path("/mnt/data/cultivo_map_final.csv")
]

PATCH_SELECTIONS = [
    (2020, "29TPF", "29TPF_patch_20_15"),
    (2021, "29TPG", "29TPG_patch_17_10"),
    (2022, "29TPH", "29TPH_patch_13_29"),
    (2023, "29TQG", "29TQG_patch_02_22"),
    (2024, "30TUM", "30TUM_patch_20_15"),
    (2024, "30TUM", "30TUM_patch_00_16"),
    (2024, "30TUK", "30TUK_patch_00_00"),
]

# ── crop map -----------------------------------------------------------
crop_map = {}
for p in CSV_CANDIDATES:
    if p.exists():
        df = pd.read_csv(p, header=None, names=["Crop", "Code"])
        crop_map = {int(r.Code): r.Crop for r in df.itertuples()}
        print(f"✓ Crop map loaded ({len(crop_map)}) from {p}")
        break

# ── util functions -----------------------------------------------------
def open_ds(root, grp):
    return xr.open_dataset(xr.backends.NetCDF4DataStore(root[grp]), mask_and_scale=False)

def load_patch_disk(y, tile, stem):
    f = BASE_NAS / str(y) / tile / f"{y}_{stem}.nc"
    root = netCDF4.Dataset(f, "r")
    bands = {g: open_ds(root, g) for g in root.groups if g.startswith("B")}
    parc  = open_ds(root, "parcels")
    lab   = open_ds(root, "labels")
    return bands, parc, lab

def stretch(a, p1=2, p2=98):
    lo, hi = np.nanpercentile(a, [p1, p2]); return np.clip((a-lo)/(hi-lo+1e-9), 0, 1)

def resize_to(a, tgt):
    if a.shape == tgt: return a
    fy = tgt[0] // a.shape[0]; fx = tgt[1] // a.shape[1]
    return np.repeat(np.repeat(a, fy, 0), fx, 1)

def discrete_cmap(n, base="nipy_spectral", include_zero=True):
    base_c = mpl.cm.get_cmap(base)
    colors = [(0, 0, 0, 1)] if include_zero else []
    colors += list(base_c(np.linspace(0.10 if include_zero else 0, 1.00, n)))
    return mpl.colors.ListedColormap(colors)

# ── widgets ------------------------------------------------------------
COLORMAPS = ["plasma","viridis","inferno","magma","gray","terrain","cividis","cubehelix"]

patch_dd   = wd.Dropdown(options={f"{y}-{t}-{s}": (y,t,s) for y,t,s in PATCH_SELECTIONS},
                         description="Patch:")
band_dd    = wd.Dropdown(description="Band:")
cmap_dd    = wd.Dropdown(options=COLORMAPS, value="plasma", description="Colormap:")
style_dd   = wd.Dropdown(options=["Color + legend",
                                  "Grayscale (no legend)",
                                  "Scientific (viridis)"],
                         value="Color + legend", description="Overlay style:")
ts_slider  = wd.IntSlider(description="Timestamp")
overlay_rb = wd.RadioButtons(options=["None","Parcels","Labels"], description="Overlay:")
btn_png    = wd.Button(description="Save PNG", button_style="success")
lbl_busy   = wd.Label(value="")
out_fig    = wd.Output()

_state, _cache = {}, {}

# ── UI helpers ---------------------------------------------------------
def set_busy(on=True): lbl_busy.value = "⏳ Loading…" if on else ""

def update_bands():
    band_dd.options = ["RGB composite"] + sorted(_state["bands"].keys())
    if band_dd.value not in band_dd.options: band_dd.value = "RGB composite"
    cmap_dd.disabled = (band_dd.value == "RGB composite")

# ── callbacks ----------------------------------------------------------
def on_patch_change(_=None):
    if patch_dd.value is None: return
    set_busy(True)
    y,tile,stem = patch_dd.value; pid = f"{y}_{tile}_{stem}"
    if pid in _cache: bands,parc,lab = _cache[pid]
    else:
        bands,parc,lab = load_patch_disk(y,tile,stem); _cache[pid] = (bands,parc,lab)
    _state.update(bands=bands,parc=parc,lab=lab)
    ts_slider.max = next(iter(bands.values()))["time"].size - 1
    ts_slider.value = 0
    update_bands(); draw(); set_busy(False)

def get_overlay():
    sel = overlay_rb.value
    if sel == "None": return None, None, None
    ds, key = (_state["parc"], "parcels") if sel == "Parcels" else (_state["lab"], "labels")
    arr = ds[key] if "time" not in ds[key].dims else ds[key].isel(time=ts_slider.value)
    data = arr.values

    # choose base cmap according to style
    if style_dd.value == "Scientific (viridis)":
        base_cmap = "viridis"
    else:
        base_cmap = "nipy_spectral"

    if sel == "Labels":
        present = np.unique(data)
        present = present[present != 0]
        n = len(present)
        cmap = discrete_cmap(n, base=base_cmap)
        remap = {c: i+1 for i,c in enumerate(sorted(present))}
        data = np.vectorize(lambda x: remap.get(x,0))(data)
        labels = [crop_map.get(c, str(c)) for c in sorted(present)]
        return data, cmap, labels
    else:  # Parcels
        cmap = discrete_cmap(int(data.max()), base=base_cmap)
        return data, cmap, None

def make_raster():
    b,t = band_dd.value, ts_slider.value; bands = _state["bands"]
    ref = _state["parc"]["parcels"].shape
    if b == "RGB composite" and all(k in bands for k in ("B04","B03","B02")):
        r = resize_to(stretch(bands["B04"]["B04"][t]), ref)
        g = resize_to(stretch(bands["B03"]["B03"][t]), ref)
        b_ = resize_to(stretch(bands["B02"]["B02"][t]), ref)
        return np.dstack([r,g,b_]), None
    arr = resize_to(bands[b][b][t], ref)
    return stretch(arr), cmap_dd.value

def draw(*_):
    if not _state: return
    out_fig.clear_output(wait=True)
    ov_arr, ov_cmap, labels = get_overlay()
    ras, ras_cmap = make_raster()

    gray_mode = (style_dd.value == "Grayscale (no legend)")
    sci_mode  = (style_dd.value == "Scientific (viridis)")

    if gray_mode and ov_arr is not None:
        ov_cmap = "gray"
    elif sci_mode and ov_arr is not None:
        # ov_cmap already uses viridis via get_overlay()
        pass

    with out_fig:
        fig,(axL,axR)=plt.subplots(1,2,figsize=(11,5))
        # left
        axL.imshow(ras if ras_cmap is None else ras, cmap=ras_cmap or None)
        if ov_arr is not None:
            axL.imshow(ov_arr, cmap=ov_cmap, alpha=.3)
        axL.axis("off"); axL.set_title(f"{band_dd.value}")

        # right
        axR.axis("off")
        if ov_arr is not None:
            im=axR.imshow(ov_arr, cmap=ov_cmap)
            if sci_mode and overlay_rb.value=="Labels":
                # legend in viridis (discrete labels)
                handles=[mpatches.Patch(color=ov_cmap(i+1),label=l)
                         for i,l in enumerate(labels)]
                axR.legend(handles=handles, bbox_to_anchor=(1.02,0.5),
                           loc="center left", fontsize=6, frameon=False)
            elif not gray_mode:
                # color+legend style
                if overlay_rb.value=="Labels" and labels:
                    handles=[mpatches.Patch(color=ov_cmap(i+1),label=l)
                             for i,l in enumerate(labels)]
                    axR.legend(handles=handles, bbox_to_anchor=(1.02,0.5),
                               loc="center left", fontsize=6, frameon=False)
                else:
                    fig.colorbar(im, ax=axR, fraction=.046, pad=.02).ax.tick_params(labelsize=6)
            axR.set_title(overlay_rb.value + (" (gray)" if gray_mode else
                                              " (viridis)" if sci_mode else ""))
        else:
            axR.text(.5,.5,"(empty)",ha="center",va="center")

        year,tile,stem = patch_dd.value
        fig.suptitle(f"{year} – {stem}  |  t={ts_slider.value}", fontsize=11)
        plt.tight_layout(); plt.show()
    _state["fig"]=plt.gcf()

def save_png(_):
    fig=_state.get("fig")
    if fig is None:
        print("⚠️  Draw something first.")
        return
    fname = OUT_DIR / f"patch_view_{patch_dd.label}_band{band_dd.value}_t{ts_slider.value}.png"
    fig.savefig(fname, dpi=300, bbox_inches="tight")
    print(f"✓ Saved → {fname}")

# bind
for widget in (patch_dd, band_dd, cmap_dd, ts_slider, overlay_rb, style_dd):
    widget.observe(lambda *_:(set_busy(True),draw(),set_busy(False)), names="value")
patch_dd.observe(on_patch_change, names="value")
btn_png.on_click(save_png)
band_dd.observe(lambda *_: update_bands(), names="value")

# UI
display(wd.VBox([
    patch_dd,
    wd.HBox([band_dd, cmap_dd, ts_slider]),
    overlay_rb,
    style_dd,
    wd.HBox([btn_png, lbl_busy]),
    out_fig
]))
on_patch_change()


✓ Crop map loaded (313) from /run/user/1000/gvfs/smb-share:server=10.168.168.61,share=rodrigopg/cultivo_map_final.csv


VBox(children=(Dropdown(description='Patch:', options={'2020-29TPF-29TPF_patch_20_15': (2020, '29TPF', '29TPF_…

<Figure size 640x480 with 0 Axes>

In [4]:
import netCDF4, numpy as np
from pathlib import Path

tesela = "29TPF"
stem   = "29TPF_patch_20_15"
year   = 2024

nc_path = Path(BASE_NAS_PATH) / str(year) / tesela / f"{year}_{stem}.nc"
ds = netCDF4.Dataset(nc_path, "r")                 # raíz
time_vals = None

if "time" in ds.variables:
    time_vals = ds.variables["time"][:]
else:                                              # buscar en sub-grupos
    for g in ds.groups.values():
        if "time" in g.variables:
            time_vals = g.variables["time"][:]
            break
ds.close()

# Índices y valores duplicados
vals, counts = np.unique(time_vals, return_counts=True)
dups = vals[counts > 1]

print("Duplicados (valor bruto):", dups)
for v in dups:
    idxs = np.where(time_vals == v)[0]
    print(f"  → {v}  aparece en índices {idxs}")


Duplicados (valor bruto): [20071.]
  → 20071.0  aparece en índices [20 21]


In [6]:
import netCDF4, numpy as np
from datetime import datetime, timedelta
from pathlib import Path

tesela = "29TPF"
stem   = "29TPF_patch_20_15"
year   = 2024

p = Path(BASE_NAS_PATH) / str(year) / tesela / f"{year}_{stem}.nc"
ds = netCDF4.Dataset(p, "r")
t = ds.variables["time"][:] if "time" in ds.variables else ds.groups["B01"].variables["time"][:]

# índices donde el valor es 20071.0
dup_idxs = np.where(t == 20071.0)[0]
print("índices duplicados:", dup_idxs)

for i in dup_idxs:
    # muestra la media de la banda B04 para ver si las escenas son distintas
    b = ds.groups["B04"].variables["B04"][i, ...]
    print(f"escena {i}  media={b.mean():.3f}")
ds.close()


índices duplicados: [20 21]
escena 20  media=2111.734
escena 21  media=2116.632


In [28]:
# ╔══════════════════════════════════════════════════════════════╗
# ║  ELIMINA LA 2ª OCURRENCIA DEL TIMESTAMP DUPLICADO (2024)     ║
# ║  Versión corregida – crea dims en cada sub-grupo             ║
# ╚══════════════════════════════════════════════════════════════╝
import os, glob, shutil, netCDF4, numpy as np
from pathlib import Path

BASE_NAS_PATH = Path("/run/user/1000/gvfs/smb-share:server=10.168.168.61,share=rodrigopg/parches_S4A_CyL")
TESSELAS      = ["29TPG"] # ["29TPF", "29TPG", "29TPH", "29TQH"]
YEAR          = 2024

def find_duplicate_indices(time_arr):
    uniq, counts = np.unique(time_arr, return_counts=True)
    dup_vals = uniq[counts > 1]
    idx_remove = []
    for val in dup_vals:
        idxs = np.where(time_arr == val)[0]
        idx_remove.extend(idxs[1:])          # descartar las posteriores
    return sorted(idx_remove)

def ensure_dimensions(src_grp, dst_grp, idx_remove):
    """Crea en dst_grp las dimensiones que existen en src_grp (si faltan)."""
    for dname, d in src_grp.dimensions.items():
        if dname not in dst_grp.dimensions:
            if dname == "time" and idx_remove:
                new_len = len(d) - len(idx_remove)
            else:
                new_len = len(d) if not d.isunlimited() else None
            dst_grp.createDimension(dname, new_len)

def copy_group(src_grp, dst_grp, idx_remove):
    # 1) dim ⇨ garantizar que existen en este sub-grupo
    ensure_dimensions(src_grp, dst_grp, idx_remove)

    # 2) variables
    for vname, v in src_grp.variables.items():
        data = v[:]
        if "time" in v.dimensions and idx_remove:
            data = np.delete(data, idx_remove, axis=0)
        out = dst_grp.createVariable(vname, v.datatype, v.dimensions,
                                     zlib=("zlib" in v.filters() or v.filters()))
        out.setncatts({att: v.getncattr(att) for att in v.ncattrs()})
        out[:] = data

    # 3) sub-grupos (recursivo)
    for gname in src_grp.groups:
        new_sub = dst_grp.createGroup(gname)
        copy_group(src_grp.groups[gname], new_sub, idx_remove)

for tes in TESSELAS:
    in_dir  = BASE_NAS_PATH / str(YEAR) / tes
    out_dir = BASE_NAS_PATH / str(YEAR) / f"{tes}c"
    out_dir.mkdir(parents=True, exist_ok=True)

    for in_path in glob.glob(str(in_dir / "*.nc")):
        in_path = Path(in_path)
        out_path = out_dir / in_path.name
        if out_path.exists():
            print(f"· {out_path.name} ya existe → omitido")
            continue

        # ── 1. índices duplicados ──────────────────────────────
        with netCDF4.Dataset(in_path, "r") as ds:
            time_var = None
            if "time" in ds.variables:
                time_var = ds.variables["time"][:]
            else:
                for g in ds.groups.values():
                    if "time" in g.variables:
                        time_var = g.variables["time"][:]
                        break
            idx_remove = find_duplicate_indices(time_var)

        if not idx_remove:
            shutil.copy2(in_path, out_path)
            print(f"· Copiado sin cambios: {in_path.name}")
            continue

        # ── 2. escribe copia limpia ────────────────────────────
        with netCDF4.Dataset(in_path, "r") as src, netCDF4.Dataset(out_path, "w") as dst:
            dst.setncatts({att: src.getncattr(att) for att in src.ncattrs()})
            # dim raíz
            ensure_dimensions(src, dst, idx_remove)
            # variables + grupos
            copy_group(src, dst, idx_remove)

        print(f"✓ Duplicado eliminado en {in_path.name}  →  {out_dir.name}/")

print("\nProceso terminado.")


✓ Duplicado eliminado en 2024_29TPG_patch_26_07.nc  →  29TPGc/
✓ Duplicado eliminado en 2024_29TPG_patch_20_11.nc  →  29TPGc/
✓ Duplicado eliminado en 2024_29TPG_patch_25_07.nc  →  29TPGc/
✓ Duplicado eliminado en 2024_29TPG_patch_18_12.nc  →  29TPGc/
✓ Duplicado eliminado en 2024_29TPG_patch_26_01.nc  →  29TPGc/
✓ Duplicado eliminado en 2024_29TPG_patch_26_04.nc  →  29TPGc/
✓ Duplicado eliminado en 2024_29TPG_patch_28_22.nc  →  29TPGc/
✓ Duplicado eliminado en 2024_29TPG_patch_17_11.nc  →  29TPGc/
✓ Duplicado eliminado en 2024_29TPG_patch_19_13.nc  →  29TPGc/
✓ Duplicado eliminado en 2024_29TPG_patch_23_09.nc  →  29TPGc/
✓ Duplicado eliminado en 2024_29TPG_patch_19_08.nc  →  29TPGc/
✓ Duplicado eliminado en 2024_29TPG_patch_21_09.nc  →  29TPGc/
✓ Duplicado eliminado en 2024_29TPG_patch_25_00.nc  →  29TPGc/
✓ Duplicado eliminado en 2024_29TPG_patch_22_06.nc  →  29TPGc/
✓ Duplicado eliminado en 2024_29TPG_patch_27_02.nc  →  29TPGc/
✓ Duplicado eliminado en 2024_29TPG_patch_29_05.nc  →  

· 2024/29TPFc/2024_29TPF_patch_26_15.nc
· 2024/29TPFc/2024_29TPF_patch_22_20.nc
· 2024/29TPFc/2024_29TPF_patch_22_22.nc
· 2024/29TPFc/2024_29TPF_patch_23_22.nc
· 2024/29TPFc/2024_29TPF_patch_28_09.nc
· 2024/29TPFc/2024_29TPF_patch_27_11.nc
· 2024/29TPFc/2024_29TPF_patch_23_27.nc
· 2024/29TPFc/2024_29TPF_patch_28_15.nc
· 2024/29TPFc/2024_29TPF_patch_23_20.nc
· 2024/29TPFc/2024_29TPF_patch_29_19.nc
· 2024/29TPFc/2024_29TPF_patch_28_22.nc
· 2024/29TPFc/2024_29TPF_patch_23_28.nc
· 2024/29TPFc/2024_29TPF_patch_26_23.nc
· 2024/29TPFc/2024_29TPF_patch_29_29.nc
· 2024/29TPFc/2024_29TPF_patch_25_24.nc
· 2024/29TPFc/2024_29TPF_patch_27_28.nc
· 2024/29TPFc/2024_29TPF_patch_26_29.nc
· 2024/29TPFc/2024_29TPF_patch_24_19.nc
· 2024/29TPFc/2024_29TPF_patch_24_13.nc
· 2024/29TPFc/2024_29TPF_patch_25_29.nc
· 2024/29TPFc/2024_29TPF_patch_26_12.nc
· 2024/29TPFc/2024_29TPF_patch_27_09.nc
· 2024/29TPFc/2024_29TPF_patch_28_08.nc
· 2024/29TPFc/2024_29TPF_patch_29_13.nc
· 2024/29TPFc/2024_29TPF_patch_28_29.nc
