In [3]:
#Poniendole la suma de precipitaciones a los datos de entrenamiento:
import pandas as pd

df = pd.read_csv('registros_rio_6746.csv')

In [4]:
df

Unnamed: 0,date,rio_id,lat,lon,altura_value,precipitaciones_value,es_null,year
0,2023-01-02,6746-2123,-36.397778,-67.140278,-0.06,0.0,False,2023
1,2023-01-03,6746-2123,-36.397778,-67.140278,-0.06,0.0,False,2023
2,2023-01-04,6746-2123,-36.397778,-67.140278,-0.06,0.0,False,2023
3,2023-01-05,6746-2123,-36.397778,-67.140278,-0.06,0.1,False,2023
4,2023-01-06,6746-2123,-36.397778,-67.140278,-0.06,0.1,False,2023
...,...,...,...,...,...,...,...,...
1003,2025-10-01,6746-2123,-36.397778,-67.140278,0.75,0.0,False,2025
1004,2025-10-02,6746-2123,-36.397778,-67.140278,0.78,0.0,False,2025
1005,2025-10-03,6746-2123,-36.397778,-67.140278,0.81,0.0,False,2025
1006,2025-10-04,6746-2123,-36.397778,-67.140278,0.83,0.4,False,2025


In [6]:
df = df.drop(columns=["es_null", "year"])


In [7]:
df

Unnamed: 0,date,rio_id,lat,lon,altura_value,precipitaciones_value
0,2023-01-02,6746-2123,-36.397778,-67.140278,-0.06,0.0
1,2023-01-03,6746-2123,-36.397778,-67.140278,-0.06,0.0
2,2023-01-04,6746-2123,-36.397778,-67.140278,-0.06,0.0
3,2023-01-05,6746-2123,-36.397778,-67.140278,-0.06,0.1
4,2023-01-06,6746-2123,-36.397778,-67.140278,-0.06,0.1
...,...,...,...,...,...,...
1003,2025-10-01,6746-2123,-36.397778,-67.140278,0.75,0.0
1004,2025-10-02,6746-2123,-36.397778,-67.140278,0.78,0.0
1005,2025-10-03,6746-2123,-36.397778,-67.140278,0.81,0.0
1006,2025-10-04,6746-2123,-36.397778,-67.140278,0.83,0.4


## Iniciando request a Open Meteo para corregir el nro de precipitaciones.

In [29]:
def tomar_precipitaciones_bien(lat, lon):
    """Devuelve una lista 'precipitation_sum' con valores diarios desde 2023-01-01 hasta 2025-10-05.
    Si faltan días se rellenan con 0 manteniendo la posición correspondiente."""
    import requests
    import pandas as pd
    start = pd.Timestamp('2023-01-01')
    end = pd.Timestamp('2025-10-05')
    url = (
        f"https://archive-api.open-meteo.com/v1/archive?latitude={lat}&longitude={lon}"
        f"&start_date={start.date()}&end_date={end.date()}&daily=precipitation_sum&timezone=auto"
    )
    try:
        resp = requests.get(url, timeout=20)
        resp.raise_for_status()
        data = resp.json()
    except Exception as e:
        print('Error al obtener datos de la API:', e)
        precipitation_sum = []
        return precipitation_sum

    daily = data.get('daily', {})
    times = daily.get('time', [])
    precs = daily.get('precipitation_sum', [])

    # Crear serie datetime -> precipitación (maneja desajustes de longitud)
    try:
        # Truncar al tamaño mínimo entre times y precs si es necesario
        n = min(len(times), len(precs))
        times_trunc = times[:n]
        precs_trunc = precs[:n]
        idx = pd.to_datetime(times_trunc, errors='coerce')
        ser = pd.Series(data=precs_trunc, index=idx)
    except Exception as e:
        print('Error al parsear arrays de daily:', e)
        ser = pd.Series(dtype=float)

    # Rango completo de fechas y reindexar rellenando con 0 donde falte
    full_index = pd.date_range(start, end, freq='D')
    ser = ser.reindex(full_index, fill_value=0)

    # Asegurar que el resultado es una lista de floats/ints con la longitud esperada
    precipitation_sum = [float(x) if pd.notna(x) else 0.0 for x in ser.tolist()]
    return precipitation_sum

In [33]:
def crear_serie_precipitaciones(arr, start='2023-01-01', col_name='precipitaciones_value'):
    """Crear DataFrame diario con columna col_name a partir de un iterable `arr`.
    - start: fecha inicial (string o datetime).
    - Si arr está vacío devuelve DataFrame vacío con la columna requerida.
    """
    import pandas as pd
    # Normalizar None/NaN y convertir a lista
    if arr is None:
        return pd.DataFrame(columns=[col_name])
    if isinstance(arr, pd.Series):
        values = arr.fillna(0).tolist()
    else:
        try:
            # evitar tratar strings como iterables de caracteres
            if isinstance(arr, (str, bytes)):
                values = [arr]
            else:
                values = [0.0 if (x is None or (isinstance(x, float) and pd.isna(x))) else x for x in arr]
        except TypeError:
            values = [arr]

    # asegurar tipos numéricos y reemplazar NaN/None por 0.0
    cleaned = []
    for v in values:
        try:
            if v is None:
                cleaned.append(0.0)
            else:
                # convertir a float si posible
                cleaned.append(float(v))
        except Exception:
            cleaned.append(0.0)

    n = len(cleaned)
    if n == 0:
        return pd.DataFrame(columns=[col_name])

    start_ts = pd.to_datetime(start)
    idx = pd.date_range(start=start_ts, periods=n, freq='D')
    df = pd.DataFrame({col_name: cleaned}, index=idx)
    # opcional: asegurar nombre del índice
    df.index.name = 'date'
    return df

In [34]:
array = tomar_precipitaciones_bien(-36.397, -67.140)
df_precip = crear_serie_precipitaciones(array, start='2023-01-01', col_name='precipitaciones_value')
df_precip

Unnamed: 0_level_0,precipitaciones_value
date,Unnamed: 1_level_1
2023-01-01,1.6
2023-01-02,0.0
2023-01-03,0.0
2023-01-04,0.0
2023-01-05,0.1
...,...
2025-10-01,0.0
2025-10-02,0.0
2025-10-03,0.0
2025-10-04,0.4
