In [1]:
import pandas as pd
import requests
import json # Importa el módulo 'json'

In [None]:
def obtener_datos(series_id, site_code):
    """
    Realiza una petición a la API y devuelve un DataFrame.
    """
    url = f"https://alerta.ina.gob.ar/pub/datos/datos&timeStart=2025-09-18&timeEnd=2025-09-20&seriesId={series_id}&siteCode={site_code}&varId=2&format=csv"
    
    try:
        # Lee el CSV directamente desde la URL.
        df = pd.read_csv(url, comment='#', sep=',', header=None)
        df.columns = ['obs_id', 'timestart', 'timeend', 'valor', 'timeupdate']
        
        # Agrega las columnas adicionales
        df['seriesId'] = series_id
        df['siteCode'] = site_code
        
        return df
    
    except Exception as e:
        print(f"Error al obtener datos para seriesId {series_id}: {e}")
        return pd.DataFrame() # Devuelve un DataFrame vacío en caso de error

# Leer el archivo JSON de forma síncrona con Python
try:
    with open("alturasHidrometricas.json", 'r', encoding='utf-8') as file:
        alturas_json = json.load(file)
except FileNotFoundError:
    print("Error: El archivo 'alturasHidrometricas.json' no se encontró.")
    alturas_json = []

lista_de_dataframes = []

for item in alturas_json:
    df_temporal = obtener_datos(item['seriesid'], item['sitecode'])
    lista_de_dataframes.append(df_temporal)

# Concatena todos los DataFrames en uno solo
df_final = pd.concat(lista_de_dataframes, ignore_index=True)

# Muestra las primeras 5 filas del DataFrame final
print(df_final.head())

# Opcional: Guarda el DataFrame final en un solo archivo CSV
df_final.to_csv("datos_finales.csv", index=False)

Error al obtener datos para seriesId 55: No columns to parse from file
Error al obtener datos para seriesId 151: No columns to parse from file
Error al obtener datos para seriesId 3279: No columns to parse from file
Error al obtener datos para seriesId 3280: 'utf-8' codec can't decode byte 0xf3 in position 695: invalid continuation byte
Error al obtener datos para seriesId 3309: 'utf-8' codec can't decode byte 0xe1 in position 759: invalid continuation byte
Error al obtener datos para seriesId 3312: 'utf-8' codec can't decode byte 0xe1 in position 388: invalid continuation byte
Error al obtener datos para seriesId 3314: 'utf-8' codec can't decode byte 0xe1 in position 417: invalid continuation byte
Error al obtener datos para seriesId 3345: 'utf-8' codec can't decode byte 0xf3 in position 560: invalid continuation byte
Error al obtener datos para seriesId 3347: 'utf-8' codec can't decode byte 0xf3 in position 334: invalid continuation byte
Error al obtener datos para seriesId 6060: 'ut

In [4]:
df_nuevito = pd.read_csv("datos_finales.csv")
df_nuevito

Unnamed: 0,obs_id,timestart,timeend,valor,timeupdate,seriesId,siteCode
0,25671322243,2025-09-18 00:00:00,2025-09-18 00:00:00,0.66,2025-09-18 18:01:45.740123,8,8
1,25678429536,2025-09-19 00:00:00,2025-09-19 00:00:00,0.46,2025-09-19 12:01:56.589385,8,8
2,25671322611,2025-09-18 00:00:00,2025-09-18 00:00:00,8.60,2025-09-18 18:01:46.129692,9,9
3,25678429676,2025-09-19 00:00:00,2025-09-19 00:00:00,8.40,2025-09-19 12:01:56.830545,9,9
4,25671322635,2025-09-18 00:00:00,2025-09-18 00:00:00,9.20,2025-09-18 18:01:46.298099,10,10
...,...,...,...,...,...,...,...
10329,25675266304,2025-09-19 00:00:00,2025-09-19 00:00:00,1.48,2025-09-19 04:01:46.98231,37594,7392
10330,25676460271,2025-09-19 04:00:00,2025-09-19 04:00:00,1.45,2025-09-19 07:01:50.255738,37594,7392
10331,25677641323,2025-09-19 08:00:00,2025-09-19 08:00:00,1.42,2025-09-19 10:01:48.955388,37594,7392
10332,25679618659,2025-09-19 12:00:00,2025-09-19 12:00:00,1.40,2025-09-19 15:01:48.669583,37594,7392


In [50]:
df = df_nuevito.drop(['timestart', 'timeupdate', 'obs_id'], axis=1)

In [62]:
#Se puede hacer : df.iloc[1 + 10]
df
df['timeend'] = pd.to_datetime(df['timeend'])
df['timeend'] = df['timeend'].dt.date
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10334 entries, 0 to 10333
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   timeend   10334 non-null  object 
 1   valor     10334 non-null  float64
 2   seriesId  10334 non-null  int64  
 3   siteCode  10334 non-null  int64  
dtypes: float64(1), int64(2), object(1)
memory usage: 323.1+ KB


In [55]:
print(df.iloc[3]["siteCode"])


9


# Limpiando los datos de los rios para estandarizar su altura segun el promedio de los registros

In [37]:
datos = {"timeend": [],
        "valor": [],
        "seriesId":	[],
        "siteCode": []}
i = 0
while i < (len(df)):
    elemento = df.iloc[i]
    elemento_anterior = df.iloc[i - 1]
    time = elemento.timeend
    series = elemento.seriesId
    site = elemento.siteCode
    values = []
    if (series != elemento_anterior.seriesId and site != elemento_anterior.siteCode) or i == 0 or time != elemento_anterior.timeend:
        #Si el id del rio es el primero de ese rio o son registros de otra fecha...
        for indice, iscopia in df.iloc[i:].iterrows(): 
            if iscopia['seriesId'] == series and iscopia['siteCode'] == site and iscopia['timeend'] == time:
                values.append(iscopia["valor"])
            else:
                break
        new_value = sum(values) / len(values)
        numero_cortado_str = str(new_value)[:4]
        new_value = float(numero_cortado_str)
        fecha = elemento["timeend"]
        datos["timeend"].append(fecha)
        datos["valor"].append(new_value)
        datos["seriesId"].append(series)
        datos["siteCode"].append(site)
        i = i + len(values)
df = pd.DataFrame(datos)
df.to_csv('alturas_rios_limpio.csv', index=False)


In [None]:
df_limpio = df.groupby(["timeend", "seriesId", "siteCode"])["valor"].mean().reset_index()
for indice, elemento in df_limpio.iterrows():
    valor = elemento["valor"]
    numero_cortado_str = str(valor[:4])
    elemento["valor"] = valor
condicion = df_limpio['seriesId'] == 8
df_limpio[condicion]

Unnamed: 0,timeend,seriesId,siteCode,valor
0,2025-09-18,8,8,0.66
393,2025-09-19,8,8,0.46


In [75]:
df_limpio = df_limpio.sort_values(by='seriesId')
df_limpio

Unnamed: 0,timeend,seriesId,siteCode,valor
0,2025-09-18,8,8,0.660000
393,2025-09-19,8,8,0.460000
394,2025-09-19,9,9,8.400000
1,2025-09-18,9,9,8.600000
2,2025-09-18,10,10,9.200000
...,...,...,...,...
390,2025-09-18,37594,7392,1.446000
391,2025-09-18,37595,6624,0.634167
787,2025-09-19,37595,6624,0.628333
392,2025-09-18,37597,6622,1.119167


In [77]:
df_limpio.to_csv('altura_rios_limpio.csv')