In [1]:
import openmeteo_requests
import os
import requests_cache
import pandas as pd
from retry_requests import retry
from datetime import datetime

In [None]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

date = datetime.now().strftime("%Y-%m-%d")
año = str(date.year)
año_menos5 = str(int(date.year) - 3)
month = str(date.month)

today = año +"-"+month+"-"+str(date.day)
yesterday = año_menos5 +"-"+month+"-"+str(date.day)
print(today)
print(yesterday) #datosapi.com
# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
#temperature_2m , precipitation, rain

variable = ["temperature_2m", "relative_humidity_2m", "dew_point_2m", "apparent_temperature",
            "pressure_msl", "surface_pressure", "precipitation", "rain", "snowfall", "cloud_cover", "cloud_cover_low", "cloud_cover_mid"
            ,"cloud_cover_high", "shortwave_radiation", "direct_radiation", "diffuse_radiation", "global_tilted_irradiance", "sunshine_duration"
            , "wind_speed_10m", "wind_speed_100m", "wind_direction_10m","wind_direction_100m", "wind_gusts_10m", "et0_fao_evapotranspiration", "weather_code", "snow_depth"
            ,"vapour_pressure_deficit", "soil_temperature_0_to_7cm", "soil_moisture_0_to_7cm", ]

latitud_longitud = [
    ("Madrid", 40.4168, -3.7038),
    ("Barcelona", 41.3888, 2.159),
    ("Hoyo de Manzanares", 40.6227, -3.9073)
]

# Acceder a los datos correctamente
for ciudad, lat, lon in latitud_longitud:
    print(ciudad)

    for var in variable :
        params = { 
            "latitude": lat,
            "longitude": lon,
            "start_date": yesterday,
            "end_date": today,
            "hourly": var
        }
        responses = openmeteo.weather_api(url, params=params)

        # Process first location. Add a for-loop for multiple locations or weather models
        response = responses[0]
        """print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
        print(f"Elevation {response.Elevation()} m asl")
        print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
        print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s") """

        # Process hourly data. The order of variables needs to be the same as requested.
        hourly = response.Hourly()
        hourly_var = hourly.Variables(0).ValuesAsNumpy()

        hourly_data = {"date": pd.date_range(
            start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
            end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
            freq = pd.Timedelta(seconds = hourly.Interval()),
            inclusive = "left"
        )}

        hourly_data[var] = hourly_var

        hourly_dataframe = pd.DataFrame(data = hourly_data)
        print(hourly_dataframe)

        hourly_dataframe["date"] = pd.to_datetime(hourly_dataframe["date"])  # Asegurar que la fecha sea datetime

        for _, row in hourly_dataframe.iterrows():
            year = row["date"].year
            month = row["date"].month
            day = row["date"].day
            
            # Crear la ruta de carpetas
            folder_path = f"datos/{ciudad}/{var}/{year}/{month:02d}"
            os.makedirs(folder_path, exist_ok=True)  # Crear carpetas si no existen
            
            # Ruta del archivo CSV
            file_path = os.path.join(folder_path, f"{year}-{month:02d}-{day:02d}.csv")
            
            # Guardar los datos de esa fecha en un archivo CSV
            row_df = pd.DataFrame([row])  # Convertir la fila en DataFrame
            row_df.to_csv(file_path, index=False, mode='a', header=not os.path.exists(file_path))

        print("Datos guardados en carpetas separadas por año, mes y día.")






In [None]:
#Recoger la media de la humedad de 9 de la mañana hasta 10 de la noche y hacer media para asignarla al día
from pathlib import Path
def put_humidity(zona):
    base_path = Path(f"../../data/raw/Open_Meteo/diarios/{zona}")
    print("Ruta absoluta:", base_path.resolve())

    dfs = []

    for entry in base_path.glob("*"):
        print(f"[DEBUG] Elemento encontrado: {entry.name} (¿Es directorio? {entry.is_dir()})")
        
        if entry.is_dir() and entry.name.isdigit():  # Filtra solo directorios con nombres numéricos (años)
            year = int(entry.name)
            print(f"[DEBUG] Procesando año: {year}")
            
            # Iterar sobre los meses dentro del año
            for month_dir in entry.glob("*"):
                if month_dir.is_dir():
                    print(f"[DEBUG] Procesando mes: {month_dir.name}")
                    
                    # Leer todos los CSV del mes
                    for csv_file in month_dir.glob("*.csv"):
                        print(f"[DEBUG] Leyendo archivo: {csv_file}")
                        try:
                            df = pd.read_csv(csv_file, parse_dates=["date"]) 
                            dfs.append(df)
                        except Exception as e:
                            print(f"[ERROR] Fallo al leer {csv_file}: {e}")

    if not dfs:
        print("[ERROR] No se cargaron datos. Verifica:")
        print("1. La estructura de carpetas: Barcelona_data/diarios/AAAA/MM/AAAA-MM-DD.csv")
        print("2. Que los archivos CSV tengan la columna 'fecha'")
    else:
        df = pd.concat(dfs, ignore_index=True)
        #df.sort_values("date", inplace=True)

    df['date'] = pd.to_datetime(df['date'])  # Convertir a formato datetime
    df['day'] = df['date'].dt.date  # Extraer solo la fecha sin la hora

    # Filtrar el DataFrame entre las 09:00 y las 22:00
    df_filtered = df[(df['date'].dt.hour >= 9) & (df['date'].dt.hour <= 22)]

    # Calcular la media de 'relative_humidity_2m' por día
    media_por_dia = df_filtered.groupby('day')['relative_humidity_2m'].mean()
    media_por_dia = media_por_dia.reset_index()
    media_por_dia['date'] = pd.to_datetime(media_por_dia['day'], errors='coerce')

    media_por_dia['date'] = media_por_dia['date'].dt.strftime('%Y-%m-%d %H:%M:%S')
    media_por_dia.drop(columns=['day'], inplace=True)
    
    # Mostrar resultados
    print(media_por_dia)

    return media_por_dia


In [None]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

date = datetime.now()

# Extraer los valores con ceros a la izquierda
año = str(date.year)
año_menos3 = str(date.year - 5)  # Restamos 3 años directamente
month = f"{date.month:02d}"  # Formato con dos dígitos
day = int(f"{date.day:02d}")-3  # Formato con dos dígitos

# Construimos las fechas en formato YYYY-MM-DD
today = f"{año}-{month}-{day}"
yesterday = f"{año_menos3}-{month}-{day}"
print(today)

url = "https://archive-api.open-meteo.com/v1/archive"
latitud_longitud = [
    ("Madrid", 40.4168, -3.7038, "ESMAD2800000028240A"),
    ("Barcelona", 41.3888, 2.159, "ESMAD2800000028240A"),
    ("Hoyo de Manzanares", 40.6227, -3.9073, "ESMAD2800000028240A")
]

params = {
    "hourly": "temperature_2m",
    "daily": [
        "weather_code", "temperature_2m_max", "temperature_2m_min", "temperature_2m_mean",
        
    ],
    "timezone": "Europe/London"
}

for ciudad, lat, lon, serie in latitud_longitud:
    # Actualizar parámetros con coordenadas y fechas
    params.update({
        "latitude": lat,
        "longitude": lon,
        "start_date": yesterday,
        "end_date": today
    })
    
    # Obtener datos
    responses = openmeteo.weather_api("https://archive-api.open-meteo.com/v1/archive", params=params)
    response = responses[0]
    
    daily = response.Daily()
    daily_data = {"date": pd.date_range(
        start=pd.to_datetime(daily.Time(), unit="s", utc=True),
        end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True),
        freq=pd.Timedelta(seconds=daily.Interval()),
        inclusive="left"
    )}
    
    variables_diarias = [
        "weather_code", "temperature_2m_max", "temperature_2m_min", "temperature_2m_mean"
    ]
    
    for i, var in enumerate(variables_diarias):
        daily_data[var] = daily.Variables(i).ValuesAsNumpy()
    meses_es = {
    1: 'Enero', 2: 'Febrero', 3: 'Marzo', 4: 'Abril',
    5: 'Mayo', 6: 'Junio', 7: 'Julio', 8: 'Agosto',
    9: 'Septiembre', 10: 'Octubre', 11: 'Noviembre', 12: 'Diciembre'
    }
    daily_df = pd.DataFrame(daily_data)
    
    daily_df['date'] = pd.to_datetime(daily_df['date'])
    daily_df['year'] = daily_df['date'].dt.year
    daily_df['month_number'] = daily_df['date'].dt.month 
    daily_df['month_name'] = daily_df['date'].dt.month.map(meses_es)  

    df_humidity = put_humidity(ciudad + "_data")

    # Convertir la columna 'date' de df_humidity a datetime con UTC (para que coincida con daily_df)
    df_humidity['date'] = pd.to_datetime(df_humidity['date'], utc=True)  # <-- Corrección clave aquí

    # Realizar el merge
    daily_df = daily_df.merge(df_humidity, on='date', how='left')
    daily_df["estación"] = serie
    df = df.rename(columns={
        "temperature_2m_mean": "tmed",
        "relative_humidity_2m": "hrMedia",
        "temperature_2m_max": "tmax",
        "temperature_2m_min": "tmin"
    })
    for (year, month_num), group in daily_df.groupby(['year', 'month_number']):
        month_name = meses_es[month_num]  
        
        directorio = os.path.join("../../data/raw/Open_Meteo/daily/" + ciudad, str(year))
        os.makedirs(directorio, exist_ok=True)
        
        archivo = os.path.join(directorio, f"{month_name}_meteo.csv")
        group.to_csv(archivo, index=False)

2025-02-20


In [None]:
url = "https://archive-api.open-meteo.com/v1/archive"
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

date = datetime.now()
latitud_longitud = [
    ("Madrid", 40.4168, -3.7038),
    ("Barcelona", 41.3888, 2.159),
    ("Hoyo de Manzanares", 40.6227, -3.9073)
]
# Extraer los valores con ceros a la izquierda
año = str(date.year)
año_menos3 = str(date.year - 5)  # Restamos 3 años directamente
month = f"{date.month:02d}"  # Formato con dos dígitos
day = int(f"{date.day:02d}") - 3  # Formato con dos dígitos

# Construimos las fechas en formato YYYY-MM-DD
today = f"{año}-{month}-{day}"
yesterday = f"{año_menos3}-{month}-{day}"
for location in latitud_longitud:
	nombre, lat, lon = location
	params = {
		"latitude": lat,
		"longitude": lon,
		"start_date": yesterday,
		"end_date": today,
		"hourly": ["temperature_2m", "relative_humidity_2m", "dew_point_2m", "apparent_temperature", "precipitation", "rain", "snowfall", "snow_depth", "weather_code", "pressure_msl", "surface_pressure", "cloud_cover", "cloud_cover_low", "cloud_cover_mid", "cloud_cover_high", "et0_fao_evapotranspiration", "vapour_pressure_deficit", "wind_speed_10m", "wind_speed_100m", "wind_direction_10m", "wind_direction_100m", "wind_gusts_10m", "soil_temperature_0_to_7cm", "soil_temperature_7_to_28cm", "soil_temperature_28_to_100cm", "soil_temperature_100_to_255cm", "soil_moisture_0_to_7cm", "soil_moisture_7_to_28cm", "soil_moisture_28_to_100cm", "soil_moisture_100_to_255cm"]
	}
	responses = openmeteo.weather_api(url, params=params)

	# Process first location. Add a for-loop for multiple locations or weather models
	response = responses[0]
	print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
	print(f"Elevation {response.Elevation()} m asl")
	print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
	print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

	# Process hourly data. The order of variables needs to be the same as requested.
	hourly = response.Hourly()
	hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
	hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
	hourly_dew_point_2m = hourly.Variables(2).ValuesAsNumpy()
	hourly_apparent_temperature = hourly.Variables(3).ValuesAsNumpy()
	hourly_precipitation = hourly.Variables(4).ValuesAsNumpy()
	hourly_rain = hourly.Variables(5).ValuesAsNumpy()
	hourly_snowfall = hourly.Variables(6).ValuesAsNumpy()
	hourly_snow_depth = hourly.Variables(7).ValuesAsNumpy()
	hourly_weather_code = hourly.Variables(8).ValuesAsNumpy()
	hourly_pressure_msl = hourly.Variables(9).ValuesAsNumpy()
	hourly_surface_pressure = hourly.Variables(10).ValuesAsNumpy()
	hourly_cloud_cover = hourly.Variables(11).ValuesAsNumpy()
	hourly_cloud_cover_low = hourly.Variables(12).ValuesAsNumpy()
	hourly_cloud_cover_mid = hourly.Variables(13).ValuesAsNumpy()
	hourly_cloud_cover_high = hourly.Variables(14).ValuesAsNumpy()
	hourly_et0_fao_evapotranspiration = hourly.Variables(15).ValuesAsNumpy()
	hourly_vapour_pressure_deficit = hourly.Variables(16).ValuesAsNumpy()
	hourly_wind_speed_10m = hourly.Variables(17).ValuesAsNumpy()
	hourly_wind_speed_100m = hourly.Variables(18).ValuesAsNumpy()
	hourly_wind_direction_10m = hourly.Variables(19).ValuesAsNumpy()
	hourly_wind_direction_100m = hourly.Variables(20).ValuesAsNumpy()
	hourly_wind_gusts_10m = hourly.Variables(21).ValuesAsNumpy()
	hourly_soil_temperature_0_to_7cm = hourly.Variables(22).ValuesAsNumpy()
	hourly_soil_temperature_7_to_28cm = hourly.Variables(23).ValuesAsNumpy()
	hourly_soil_temperature_28_to_100cm = hourly.Variables(24).ValuesAsNumpy()
	hourly_soil_temperature_100_to_255cm = hourly.Variables(25).ValuesAsNumpy()
	hourly_soil_moisture_0_to_7cm = hourly.Variables(26).ValuesAsNumpy()
	hourly_soil_moisture_7_to_28cm = hourly.Variables(27).ValuesAsNumpy()
	hourly_soil_moisture_28_to_100cm = hourly.Variables(28).ValuesAsNumpy()
	hourly_soil_moisture_100_to_255cm = hourly.Variables(29).ValuesAsNumpy()

	hourly_data = {"date": pd.date_range(
		start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
		end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
		freq = pd.Timedelta(seconds = hourly.Interval()),
		inclusive = "left"
	)}

	hourly_data["temperature_2m"] = hourly_temperature_2m
	hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
	hourly_data["dew_point_2m"] = hourly_dew_point_2m
	hourly_data["apparent_temperature"] = hourly_apparent_temperature
	hourly_data["precipitation"] = hourly_precipitation
	hourly_data["rain"] = hourly_rain
	hourly_data["snowfall"] = hourly_snowfall
	hourly_data["snow_depth"] = hourly_snow_depth
	hourly_data["weather_code"] = hourly_weather_code
	hourly_data["pressure_msl"] = hourly_pressure_msl
	hourly_data["surface_pressure"] = hourly_surface_pressure
	hourly_data["cloud_cover"] = hourly_cloud_cover
	hourly_data["cloud_cover_low"] = hourly_cloud_cover_low
	hourly_data["cloud_cover_mid"] = hourly_cloud_cover_mid
	hourly_data["cloud_cover_high"] = hourly_cloud_cover_high
	hourly_data["et0_fao_evapotranspiration"] = hourly_et0_fao_evapotranspiration
	hourly_data["vapour_pressure_deficit"] = hourly_vapour_pressure_deficit
	hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
	hourly_data["wind_speed_100m"] = hourly_wind_speed_100m
	hourly_data["wind_direction_10m"] = hourly_wind_direction_10m
	hourly_data["wind_direction_100m"] = hourly_wind_direction_100m
	hourly_data["wind_gusts_10m"] = hourly_wind_gusts_10m
	hourly_data["soil_temperature_0_to_7cm"] = hourly_soil_temperature_0_to_7cm
	hourly_data["soil_temperature_7_to_28cm"] = hourly_soil_temperature_7_to_28cm
	hourly_data["soil_temperature_28_to_100cm"] = hourly_soil_temperature_28_to_100cm
	hourly_data["soil_temperature_100_to_255cm"] = hourly_soil_temperature_100_to_255cm
	hourly_data["soil_moisture_0_to_7cm"] = hourly_soil_moisture_0_to_7cm
	hourly_data["soil_moisture_7_to_28cm"] = hourly_soil_moisture_7_to_28cm
	hourly_data["soil_moisture_28_to_100cm"] = hourly_soil_moisture_28_to_100cm
	hourly_data["soil_moisture_100_to_255cm"] = hourly_soil_moisture_100_to_255cm

	hourly_dataframe = pd.DataFrame(data=hourly_data)
    
timezone = response.Timezone()
hourly_dataframe["date"] = hourly_dataframe["date"].dt.tz_convert(timezone).dt.tz_localize(None)

for location in latitud_longitud:
	nombre, lat, lon = location
	for fecha, grupo in hourly_dataframe.groupby(hourly_dataframe["date"].dt.date):
		año_dir = str(fecha.year)
		mes_dir = f"{fecha.month:02d}"
		dia_archivo = f"{fecha.strftime('%Y-%m-%d')}.csv"
		print(nombre)
		ruta = os.path.join("../../data/raw/Open_Meteo/diarios/"+nombre+"_data", año_dir, mes_dir)
		os.makedirs(ruta, exist_ok=True)
		
		grupo.to_csv(os.path.join(ruta, dia_archivo), index=False)

print("datos guardados")


Coordinates 40.38664245605469°N -3.67608642578125°E
Elevation 666.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
Coordinates 41.37082290649414°N 2.068965435028076°E
Elevation 44.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
Coordinates 40.597537994384766°N -4.07354736328125°E
Elevation 1008.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madrid
Madri

In [None]:
#Recoger la media de la humedad de 9 de la mañana hasta 10 de la noche y hacer media para asignarla al día
from pathlib import Path
def put_humidity():
    base_path = Path("../../data/raw/Open_Meteo/diarios/Barcelona_data")
    print("Ruta absoluta:", base_path.resolve())

    dfs = []

    for entry in base_path.glob("*"):
        print(f"[DEBUG] Elemento encontrado: {entry.name} (¿Es directorio? {entry.is_dir()})")
        
        if entry.is_dir() and entry.name.isdigit():  # Filtra solo directorios con nombres numéricos (años)
            year = int(entry.name)
            print(f"[DEBUG] Procesando año: {year}")
            
            # Iterar sobre los meses dentro del año
            for month_dir in entry.glob("*"):
                if month_dir.is_dir():
                    print(f"[DEBUG] Procesando mes: {month_dir.name}")
                    
                    # Leer todos los CSV del mes
                    for csv_file in month_dir.glob("*.csv"):
                        print(f"[DEBUG] Leyendo archivo: {csv_file}")
                        try:
                            df = pd.read_csv(csv_file, parse_dates=["date"]) 
                            dfs.append(df)
                        except Exception as e:
                            print(f"[ERROR] Fallo al leer {csv_file}: {e}")

    if not dfs:
        print("[ERROR] No se cargaron datos. Verifica:")
        print("1. La estructura de carpetas: Barcelona_data/diarios/AAAA/MM/AAAA-MM-DD.csv")
        print("2. Que los archivos CSV tengan la columna 'fecha'")
    else:
        df = pd.concat(dfs, ignore_index=True)
        #df.sort_values("date", inplace=True)

    df['date'] = pd.to_datetime(df['date'])  # Convertir a formato datetime
    df['day'] = df['date'].dt.date  # Extraer solo la fecha sin la hora

    # Filtrar el DataFrame entre las 09:00 y las 22:00
    df_filtered = df[(df['date'].dt.hour >= 9) & (df['date'].dt.hour <= 22)]

    # Calcular la media de 'relative_humidity_2m' por día
    media_por_dia = df_filtered.groupby('day')['relative_humidity_2m'].mean()
    media_por_dia = media_por_dia.reset_index()
    media_por_dia['date'] = pd.to_datetime(media_por_dia['day']).dt.normalize()
    
    # Mostrar resultados
    print(media_por_dia)

    return media_por_dia
put_humidity()
