Corroborar si este es mas rapido al no seleccionar días separados

In [8]:
# Obtener última fecha registrada en la tabla
import pandas as pd
import xarray as xr
import sqlite3
from pathlib import Path
import os
import sqlite3

import cdsapi
import zipfile
from netCDF4 import Dataset, num2date


def definir_star_end(DB_PATH,start_="",end_=""):

    if start_=="":

        conn = sqlite3.connect(DB_PATH)
        cursor = conn.cursor()

        cursor.execute("SELECT MAX(date) FROM climate_test")
        row = cursor.fetchone()
        conn.close()

        # Si hay datos, usar la fecha siguiente como inicio; si no, arrancar manualmente
        if row and row[0]:
            start = pd.to_datetime(row[0]) + pd.Timedelta(days=1)
        else:
            start = pd.Timestamp("2023-01-01")  # o cualquier fecha de inicio deseada
    else:
        start = pd.Timestamp(start_)
    if end_=="":
    # Siempre terminar en ayer
        end = pd.Timestamp("today").normalize() - pd.Timedelta(days=1)
    else:
        end = pd.Timestamp(end_)       

    return (start,end)

# Generar fechas faltantes
start_,end_="",""

BASE_DIR = Path().resolve()  

DB_PATH = BASE_DIR.parent / "backend" / "data" / "mi_base_de_datos5.db"

start,end=definir_star_end(DB_PATH,start_,end_)


dates = pd.date_range(start, end, freq="D")
periods = sorted(dates.to_series().dt.to_period("M").unique())

print(f"Iniciando descarga desde {start.date()} hasta {end.date()}")






Iniciando descarga desde 2025-08-25 hasta 2025-08-29


In [None]:


TMP_DIR   = "Clima\tmp_test20d"
CDS_URL   = 'https://cds.climate.copernicus.eu/api'
CDS_KEY   = 'd9af180c-f7f8-4a2e-8029-09018fb8c920'
AREA      = [-32.0, -63.0, -33.0, -61.0]  # [Norte, Oeste, Sur, Este]
VARIABLES = ['2m_temperature','2m_dewpoint_temperature','total_precipitation']

os.makedirs(TMP_DIR, exist_ok=True)

# ————— Prepara SQLite y tabla de prueba —————
conn = sqlite3.connect(DB_PATH)
cur  = conn.cursor()
cur.execute("""
CREATE TABLE IF NOT EXISTS climate_test (
    date TEXT,
    latitude REAL,
    longitude REAL,
    t2m REAL,
    d2m REAL,
    tp REAL,
    PRIMARY KEY(date, latitude, longitude)
)
""")
conn.commit()

# # ————— Calcula últimos 20 días —————
# end   = pd.to_datetime("today").normalize() - pd.Timedelta(days=1)
# start = end - pd.Timedelta(days=19)

In [10]:

dates = pd.date_range(start, end, freq="D")
years  = sorted({str(d.year)     for d in dates})
months = sorted({f"{d.month:02d}" for d in dates})
days   = sorted({f"{d.day:02d}"   for d in dates})

print(f"Descargando datos de {start.date()} a {end.date()} ({len(dates)} días)…")

# ————— Descarga del ZIP con los 3 *.nc —————
zip_path = os.path.join(TMP_DIR, "test20d.zip")
c = cdsapi.Client(url=CDS_URL, key=CDS_KEY)


try:
    c.retrieve(
        'derived-era5-single-levels-daily-statistics',
        {
            'product_type':    'reanalysis',
            'format':          'netcdf',       # el API nos devuelve un ZIP
            'variable':        VARIABLES,
            'year':            years,
            'month':           months,
            'day':             days,
            'daily_statistic': 'daily_mean',
            'frequency':       '6_hourly',
            'time_zone':       'utc+00:00',
            'area':            AREA,
        },
        zip_path
    )

    import tempfile

    def ingest_from_zip(table_name, zip_filepath):
        """Extrae cada .nc a un NamedTemporaryFile y vuelca a SQLite."""
        extracted_paths = []

        # 1) Abre el ZIP
        with zipfile.ZipFile(zip_filepath, 'r') as z:
            nc_members = [m for m in z.namelist() if m.endswith('.nc')]
            if not nc_members:
                raise RuntimeError("No encontré archivos .nc en el ZIP")

            for member in nc_members:
                # crea un temp file garantizado escribible
                tmpf = tempfile.NamedTemporaryFile(suffix=".nc", delete=False)
                # vuelca el contenido
                with z.open(member) as src:
                    tmpf.write(src.read())
                tmpf.close()
                extracted_paths.append(tmpf.name)

        # 2) Lee coordenadas y variables
        data_arrays = {}
        time_objs = latitudes = longitudes = None

        for nc_path in extracted_paths:
            ds = Dataset(nc_path)
            # detecta nombre de dim/vars
            if time_objs is None:
                time_var = next(v for v in ds.variables if "time" in v.lower())
                lat_var  = next(v for v in ds.variables if "lat"  in v.lower())
                lon_var  = next(v for v in ds.variables if "lon"  in v.lower())
                times    = ds.variables[time_var][:]
                time_objs= num2date(times,
                            ds.variables[time_var].units,
                            getattr(ds.variables[time_var], 'calendar', 'standard'))
                latitudes = ds.variables[lat_var][:]
                longitudes= ds.variables[lon_var][:]
            # identifica la variable de datos
            var_keys = [v for v in ds.variables 
                        if v not in (time_var, lat_var, lon_var, "number")]
            if len(var_keys) != 1:
                raise RuntimeError(f"Variables inesperadas en {nc_path}: {var_keys}")
            var = var_keys[0]
            data_arrays[var] = ds.variables[var][:]
            ds.close()

        # 3) Inserta en SQLite
        sql = f"""INSERT OR REPLACE INTO {table_name}
                (date, latitude, longitude, t2m, d2m, tp)
                VALUES (?, ?, ?, ?, ?, ?)"""
        cnt = 0
        for ti, dt in enumerate(time_objs):
            # usa strftime directamente en el objeto cftime
            date_str = dt.strftime("%Y-%m-%d")
            for yi, lat in enumerate(latitudes):
                for xi, lon in enumerate(longitudes):
                    v_t2m = float(data_arrays.get("t2m", data_arrays.get("2m_temperature"))[ti,yi,xi])
                    v_d2m = float(data_arrays.get("d2m", data_arrays.get("2m_dewpoint_temperature"))[ti,yi,xi])
                    v_tp  = float(data_arrays.get("tp",  data_arrays.get("total_precipitation"))[ti,yi,xi])
                    cur.execute(sql, (date_str, float(lat), float(lon), v_t2m, v_d2m, v_tp))
                    cnt += 1
        conn.commit()

        # 4) Borra los temporales
        for p in extracted_paths:
            try:
                os.remove(p)
            except OSError:
                pass
        try:
            os.remove(zip_filepath)
        except OSError:
            pass

        return cnt


    # ————— Ejecuta ingesta y reporta —————
    inserted = ingest_from_zip("climate_test", zip_path)
    print(f"✔ Insertadas/actualizadas {inserted} filas en climate_test")
except:
    pass
conn.close()


2025-08-30 13:56:17,610 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.


Descargando datos de 2025-08-25 a 2025-08-29 (5 días)…


In [11]:

# Conectar a la base
conn = sqlite3.connect(DB_PATH)

# Leer primeros 20 registros ordenados por fecha
query = '''
SELECT *
FROM climate_test
ORDER BY date ASC
'''

df_head = pd.read_sql_query(query, conn)
conn.close()

df_head["date"].unique()


array(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04',
       '2023-01-05', '2023-01-06', '2023-01-07', '2023-01-08',
       '2023-01-09', '2023-01-10', '2023-01-11', '2023-01-12',
       '2023-01-13', '2023-01-14', '2023-01-15', '2023-01-16',
       '2023-01-17', '2023-01-18', '2023-01-19', '2023-01-20',
       '2023-01-21', '2023-01-22', '2023-01-23', '2023-01-24',
       '2023-01-25', '2023-01-26', '2023-01-27', '2023-01-28',
       '2023-01-29', '2023-01-30', '2023-01-31', '2023-02-01',
       '2023-02-02', '2023-02-03', '2023-02-04', '2023-02-05',
       '2023-02-06', '2023-02-07', '2023-02-08', '2023-02-09',
       '2023-02-10', '2023-02-11', '2023-02-12', '2023-02-13',
       '2023-02-14', '2023-02-15', '2023-02-16', '2023-02-17',
       '2023-02-18', '2023-02-19', '2023-02-20', '2023-02-21',
       '2023-02-22', '2023-02-23', '2023-02-24', '2023-02-25',
       '2023-02-26', '2023-02-27', '2023-02-28', '2023-03-01',
       '2023-03-02', '2023-03-03', '2023-03-04', '2023-