In [47]:
import pandas as pd
import numpy as np
import folium
from folium.plugins import HeatMapWithTime

In [48]:
data = pd.read_csv(r"../../data/processed/Cleaned_Data.csv")
sensors_data = pd.read_excel(r"../../data/sensores_airenuevoleon.xlsx")

In [49]:
pollutants = list(set([col.split("_")[0] for col in data.columns]))

In [50]:
pollutants.remove("Datetime")
pollutants

['PRESIONATM',
 'VIENTOVEL',
 'PM10',
 'LLUVIA',
 'NO1',
 'HUMEDAD',
 'CO',
 'NO2',
 'NOx',
 'O3',
 'RS',
 'SO2',
 'TEMPERATURA',
 'PM25']

In [51]:
df_long = pd.wide_to_long(
    data,
    stubnames=pollutants,  # your pollutant names
    i="Datetime",    # the column name that identifies each row (datetime here!)
    j="Sensor_ID", 
    sep="_",
    suffix="\\w+"
).reset_index()

In [54]:
df_long.head()

Unnamed: 0,Datetime,Sensor_ID,PRESIONATM,VIENTOVEL,PM10,LLUVIA,NO1,HUMEDAD,CO,NO2,NOx,O3,RS,SO2,TEMPERATURA,PM25
0,2022-01-01 00:00:00,ANL2,714.5,8.5,85.0,0.0,0.0113,80.0,1.24,0.0203,0.0301,0.084,0.538,0.0089,13.28,26.06
1,2022-01-01 01:00:00,ANL2,714.5,8.5,76.0,0.0,0.0113,80.0,1.35,0.0242,0.0301,0.073,0.538,0.0067,13.28,32.62
2,2022-01-01 02:00:00,ANL2,714.5,8.5,82.0,0.0,0.0113,80.0,1.77,0.0355,0.0301,0.044,0.538,0.0051,13.28,41.12
3,2022-01-01 03:00:00,ANL2,714.5,8.5,98.0,0.0,0.0113,80.0,2.38,0.0468,0.0301,0.015,0.538,0.0048,13.28,91.73
4,2022-01-01 04:00:00,ANL2,714.5,8.5,165.0,0.0,0.0113,80.0,2.88,0.0496,0.0301,0.007,0.538,0.0047,13.28,142.34


In [66]:
data = df_long.merge(sensors_data[["Zona", "Longitud", "Latitud", "Sensor_id"]], left_on="Sensor_ID", right_on="Sensor_id", how="left").drop(columns="Sensor_id")

In [70]:
data.head(1)

Unnamed: 0,Datetime,Sensor_ID,PRESIONATM,VIENTOVEL,PM10,LLUVIA,NO1,HUMEDAD,CO,NO2,NOx,O3,RS,SO2,TEMPERATURA,PM25,Zona,Longitud,Latitud
0,2022-01-01 00:00:00,ANL2,714.5,8.5,85.0,0.0,0.0113,80.0,1.24,0.0203,0.0301,0.084,0.538,0.0089,13.28,26.06,San Nicolás,-100.26,25.75


In [82]:
# PREVIOUS DATA PREPARATION (as you had):
data["Datetime"] = pd.to_datetime(data["Datetime"], format="%Y-%m-%d %H:%M:%S")
data["fecha"] = data["Datetime"].dt.date

# CALCULATE DISPLACED POSITIONS (by wind)
escala = 0.0015
data["radianes"] = np.radians(data["RS"])
data["lat_disp"] = data["Latitud"] + escala * np.cos(data["radianes"]) * data["VIENTOVEL"]
data["lon_disp"] = data["Longitud"] + escala * np.sin(data["radianes"]) * data["VIENTOVEL"]

# DAILY AVERAGE PER SENSOR
agrupado = data.groupby(["fecha", "Sensor_ID"]).agg({
    "PM25": "mean",
    "lat_disp": "first",
    "lon_disp": "first",
    "Latitud": "first",
    "Longitud": "first",
    "VIENTOVEL": "first",
    "RS": "first"
}).reset_index()

# GET UNIQUE DATES
fechas = sorted(agrupado["fecha"].unique())

# FUNCTION TO GENERATE SYNTHETIC POINTS ("CLOUD")
pm_max = 150  # or your chosen realistic upper bound
def generar_nube(row, repeticiones=15, ruido=0.0003):
    pm25_norm = min(row["PM25"] / pm_max, 1)
    return [
        [
            row["lat_disp"] + np.random.normal(0, ruido),
            row["lon_disp"] + np.random.normal(0, ruido),
            pm25_norm
        ]
        for _ in range(repeticiones)
    ]

# CREATE HEATMAP DATA
heatmap_data = []
for dia in fechas:
    puntos = agrupado[agrupado["fecha"] == dia]
    nube = []
    for _, row in puntos.iterrows():
        nube.extend(generar_nube(row))
    heatmap_data.append(nube)

fechas_str = [str(f) for f in fechas]

# CREATE BASE MAP
mapa = folium.Map(location=[25.67, -100.31], zoom_start=11, min_zoom=10, max_zoom=15, tiles="CartoDB positron")

# ADD HEATMAP WITH TIME
HeatMapWithTime(
    data=heatmap_data,
    index=fechas_str,
    auto_play=True,
    radius=60,
    max_opacity=0.8,
    min_opacity=0.3,
    use_local_extrema=False,
    gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'yellow', 0.8: 'orange', 1.0: 'red'}
).add_to(mapa)

# ADD WIND DIRECTION ARROWS (STATIC, to illustrate typical wind patterns)
for _, row in agrupado.iterrows():
    start = [row["Latitud"], row["Longitud"]]

    # Arrow length factor (adjust for visibility!)
    arrow_length = 0.01 * row["VIENTOVEL"]
    angle_rad = np.radians(row["RS"])
    end_lat = row["Latitud"] + arrow_length * np.cos(angle_rad)
    end_lon = row["Longitud"] + arrow_length * np.sin(angle_rad)
    end = [end_lat, end_lon]

    # Add line representing wind vector
    folium.PolyLine([start, end], color="blue", weight=2, opacity=0.7).add_to(mapa)

    # Optionally: add a small marker for sensor location
    folium.CircleMarker(
        location=start,
        radius=2,
        color="black",
        fill=True,
        fill_color="black"
    ).add_to(mapa)

mapa

# Test


In [155]:
df_ = pd.read_csv(r"../../WeatherData.csv")
sensors_data = pd.read_excel(r"../../data/sensores_airenuevoleon.xlsx")
data = df_.merge(sensors_data[["Zona", "Longitud", "Latitud", "Sensor_id"]], left_on="Sensor_id", right_on="Sensor_id", how="left")

In [158]:
data = data[~data["Sensor_id"].isin(["ANL10", "ANL16", "ANL15", "ANL7"])]

In [166]:
data["Dia"] = pd.to_datetime(data["Dia"], format="%Y-%m-%d %H:%M:%S")
data["Fecha"] = data["Dia"].dt.date

In [169]:
data.head()

Unnamed: 0,Dia,Registros_id,PM10,PM25,O3,CO,NO1,NO2,NOx,SO2,...,VIENTOVEL,Year,period_signature,key,hour,month,Zona,Longitud,Latitud,Fecha
0,2022-01-01 00:00:00,2048929,78.0,12.715,0.073,0.94,0.0039,0.0166,0.0135,0.0062,...,10.6,2022,01-01 00:00:00,ANL1_01-01 00:00:00,0,1,Guadalupe,-100.25,25.67,2022-01-01
1,2022-01-01 01:00:00,2048943,67.0,12.01,0.066,1.05,0.0038,0.0213,0.0161,0.005,...,9.6,2022,01-01 01:00:00,ANL1_01-01 01:00:00,1,1,Guadalupe,-100.25,25.67,2022-01-01
2,2022-01-01 02:00:00,2048957,69.0,12.285,0.053,1.15,0.0039,0.0293,0.0176,0.0045,...,9.0,2022,01-01 02:00:00,ANL1_01-01 02:00:00,2,1,Guadalupe,-100.25,25.67,2022-01-01
3,2022-01-01 03:00:00,2048971,74.0,11.99,0.029,1.57,0.0041,0.02893,0.0167,0.0043,...,6.233333,2022,01-01 03:00:00,ANL1_01-01 03:00:00,3,1,Guadalupe,-100.25,25.67,2022-01-01
4,2022-01-01 04:00:00,2048985,99.0,11.4,0.019,2.05,0.0036,0.02856,0.0152,0.0041,...,5.266667,2022,01-01 04:00:00,ANL1_01-01 04:00:00,4,1,Guadalupe,-100.25,25.67,2022-01-01


In [162]:
def daily_avg(df, date_col="Fecha", pollutant_col="PM25"):
    daily_avg = df.groupby([date_col, "Sensor_id"]).agg({
        pollutant_col: "mean",
        "Latitud": "first",
        "Longitud": "first"
    }).reset_index()
    return daily_avg

In [163]:
test = daily_avg(data)

In [None]:
dias_unicos = test["Fecha"].unique()


for dia in dias_unicos:
    subset = test[test["Fecha"] == dia]
    puntos = [
        [row["Latitud"], row["Longitud"], row["PM25"]]
        for _, row in subset.iterrows()
    ]
    heatmap_data.append(puntos)

# Convert dates to strings for HeatMapWithTime slider
fechas = [str(fecha) for fecha in dias_unicos]

import matplotlib.pyplot as plt

def plot_sensor_subplots(df, value_col, sensors, datetime_col="Fecha", sensor_id_col="Sensor_id", title=None, n_cols=3):
    """
    Plot historical data for multiple sensors, each in its own subplot.

    Parameters:
        df: pandas DataFrame
            Must contain datetime_col, sensor_id_col, and value_col.
        value_col: str
            Name of the column to plot (e.g., 'PM25').
        sensors: list
            List of sensor IDs to include in the plots.
        datetime_col: str, default 'datetime'
            Column with datetime information.
        sensor_id_col: str, default 'Sensor_ID'
            Column with sensor ID information.
        title: str or None
            Title for the entire figure.
        n_cols: int, default 2
            Number of columns for subplots.

    Returns:
        None
    """
    n_sensors = len(sensors)
    n_rows = (n_sensors + n_cols - 1) // n_cols  # ceil division

    fig, axes = plt.subplots(n_rows, n_cols, figsize=(6 * n_cols, 4 * n_rows), sharex=True, sharey=True)
    axes = axes.flatten()  # flatten to easily iterate even if 1D

    for idx, sensor in enumerate(sensors):
        ax = axes[idx]
        subset = df[df[sensor_id_col] == sensor]
        ax.plot(subset[datetime_col], subset[value_col], label=f"Sensor {sensor}", marker="o", linewidth=1)
        ax.set_title(f"Sensor {sensor}")
        ax.set_xlabel("Date")
        ax.set_ylabel(value_col)
        ax.grid(True, linestyle="--", alpha=0.5)
        ax.legend()

    # Remove any empty subplots
    for j in range(idx + 1, len(axes)):
        fig.delaxes(axes[j])

    if title:
        fig.suptitle(title, fontsize=16)

    plt.tight_layout(rect=[0, 0, 1, 0.96])  # leave space for suptitle
    plt.show()
    
plot_sensor_subplots(daily_avg(data, pollutant_col="PM25"), "PM25", sensors=df_["Sensor_id"].unique().tolist(), datetime_col="Fecha")

In [None]:
plot_sensor_subplots(daily_avg(df_, pollutant_col="PM10"), "PM10", sensors=df_["Sensor_id"].unique().tolist(), datetime_col="Fecha")

In [None]:
plot_sensor_subplots(daily_avg(df_, pollutant_col="O3"), "O3", sensors=df_["Sensor_id"].unique().tolist(), datetime_col="Fecha")

In [170]:
diario = daily_avg(data, pollutant_col="O3")
heatmap_data = []
for dia in dias_unicos:
    subset = diario[diario["Fecha"] == dia]
    puntos = [
        [row["Latitud"], row["Longitud"], row["O3"]]
        for _, row in subset.iterrows()
    ]
    heatmap_data.append(puntos)

# Convert dates to strings for HeatMapWithTime slider
fechas = [str(fecha) for fecha in dias_unicos]

# Create folium map
mapa = folium.Map(
    location=[25.67, -100.31],
    zoom_start=11,
    min_zoom=10,
    max_zoom=15,
    tiles="CartoDB positron"
)

# Add time-animated heatmap layer
HeatMapWithTime(
    data=heatmap_data,
    index=fechas,
    auto_play=True,
    radius=40,
    max_opacity=0.7,
    min_opacity=0.3,
    use_local_extrema=True,
    gradient={
        0.2: 'blue',
        0.4: 'lime',
        0.6: 'yellow',
        0.8: 'orange',
        1.0: 'red'
    }
).add_to(mapa)

# Show the map
mapa