In [11]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm

def nowcast(series, window=12, min_fraction=0.5):
    """
    Calculates the EPA Nowcast for a given time series.
    """
    nowcast_values = []
    for i in range(len(series)):
        window_data = series[max(i - window + 1, 0):i + 1]
        window_data = window_data.dropna()

        if len(window_data) < int(window * min_fraction):
            nowcast_values.append(np.nan)
            continue

        min_conc = window_data.min()
        max_conc = window_data.max()
        
        weight = min_conc / max_conc if max_conc > 0 else 1.0
        
        if max_conc > 0:
            weight = max(weight, 0.5)
        
        sum_weighted = 0.0
        sum_weights = 0.0
        
        for k, val in enumerate(window_data[::-1]):
            w = weight ** k
            sum_weighted += val * w
            sum_weights += w
        
        nowcast_val = sum_weighted / sum_weights if sum_weights > 0 else np.nan
        nowcast_values.append(nowcast_val)
        
    return nowcast_values

# Load the dataset
air_df = pd.read_csv(os.path.join("Datos_PowerBI", 'SISAIRE-MODIFIED-COMPLETE-IMPUTED-FIXED.csv'), sep=';', decimal=',')

# Ensure 'Fecha' is datetime and sort the data
air_df['Fecha'] = pd.to_datetime(air_df['Fecha'])
air_df = air_df.sort_values(["Estacion", "Fecha"])

result_df = []

for est, group in tqdm(air_df.groupby('Estacion'), desc="Processing Stations"):
    group = group.copy()
    
    # Calculate Nowcast values
    group['Nowcast_PM25'] = nowcast(group['PM25'])
    group['Nowcast_PM10'] = nowcast(group['PM10'])
    
    # Backfill the initial NaN values
    group['Nowcast_PM25'] = group['Nowcast_PM25'].bfill()
    group['Nowcast_PM10'] = group['Nowcast_PM10'].bfill()
    
    result_df.append(group)

out_df = pd.concat(result_df)
out_df.to_csv(os.path.join("Datos_PowerBI", 'SISAIRE-MODIFIED-COMPLETE-IMPUTED-FIXED-NOWCAST.csv'), sep=';', decimal='.', index=False)

out_df.head(20)

Processing Stations: 100%|██████████| 19/19 [02:32<00:00,  8.05s/it]


Unnamed: 0,Estacion,Localidad,Fecha,Año,CO,NO2,O3,PM10,PM25,SO2,Nowcast_PM25,Nowcast_PM10
21475,BOLIVIA,ENGATIVÁ,2020-10-16 00:00:00,2020,343.47,23.31696,26.614516,38.0,15.2,1.04744,11.802564,26.044713
21474,BOLIVIA,ENGATIVÁ,2020-10-16 01:00:00,2020,343.47,21.041676,26.614516,31.8,15.3,1.57116,11.802564,26.044713
21473,BOLIVIA,ENGATIVÁ,2020-10-16 02:00:00,2020,228.98,18.916824,26.614516,28.3,12.3,1.3093,11.802564,26.044713
21472,BOLIVIA,ENGATIVÁ,2020-10-16 03:00:00,2020,228.98,17.280876,26.614516,21.3,9.5,1.3093,11.802564,26.044713
21471,BOLIVIA,ENGATIVÁ,2020-10-16 04:00:00,2020,343.47,22.545996,26.614516,21.6,8.8,1.83302,11.802564,26.044713
21470,BOLIVIA,ENGATIVÁ,2020-10-16 05:00:00,2020,343.47,31.271052,26.614516,28.4,13.6,1.3093,11.802564,26.044713
21469,BOLIVIA,ENGATIVÁ,2020-10-16 06:00:00,2020,457.96,34.03524,26.614516,22.9,11.5,1.3093,11.67129,24.638243
21468,BOLIVIA,ENGATIVÁ,2020-10-16 07:00:00,2020,457.96,29.484672,26.614516,33.1,8.9,1.3093,10.479673,28.393557
21467,BOLIVIA,ENGATIVÁ,2020-10-16 08:00:00,2020,343.47,27.92394,26.614516,32.7,7.4,1.3093,8.838748,30.29652
21466,BOLIVIA,ENGATIVÁ,2020-10-16 09:00:00,2020,343.47,28.807728,26.614516,37.8,5.9,1.3093,7.367937,33.604229


In [12]:
out_df.tail(20)

Unnamed: 0,Estacion,Localidad,Fecha,Año,CO,NO2,O3,PM10,PM25,SO2,Nowcast_PM25,Nowcast_PM10
419403,USME,USME,2023-12-31 03:00:00,2023,343.47,20.684,13.342,16.9,5.3,0.786,6.655556,18.389206
419402,USME,USME,2023-12-31 04:00:00,2023,457.96,25.009,7.848,9.0,5.5,0.786,6.075629,13.688205
419401,USME,USME,2023-12-31 05:00:00,2023,572.45,25.761,2.551,8.2,8.2,2.095,7.136361,10.939292
419400,USME,USME,2023-12-31 06:00:00,2023,686.94,21.437,2.551,9.7,9.6,2.095,8.367643,10.31663
419399,USME,USME,2023-12-31 07:00:00,2023,801.43,24.445,3.728,23.7,8.1,2.357,8.232918,17.008205
419398,USME,USME,2023-12-31 08:00:00,2023,457.96,14.479,18.443,18.7,7.7,4.19,7.966007,17.852967
419397,USME,USME,2023-12-31 09:00:00,2023,343.47,10.154,34.335,22.3,20.6,3.142,14.284054,20.075726
419396,USME,USME,2023-12-31 10:00:00,2023,457.96,17.488,52.189,20.6,15.4,4.452,14.842491,20.33851
419395,USME,USME,2023-12-31 11:00:00,2023,457.96,15.795,74.752,15.4,12.4,3.142,13.621197,17.869109
419394,USME,USME,2023-12-31 12:00:00,2023,572.45,21.437,85.543,16.1,9.7,4.975,11.660562,16.982759


In [13]:
# Load the dataset
air_df = pd.read_csv(os.path.join("Datos_PowerBI", 'SISAIRE-MODIFIED-COMPLETE-FIXED.csv'), sep=';', decimal=',')

# Ensure 'Fecha' is datetime and sort the data
air_df['Fecha'] = pd.to_datetime(air_df['Fecha'])
air_df = air_df.sort_values(["Estacion", "Fecha"])

result_df = []

for est, group in tqdm(air_df.groupby('Estacion'), desc="Processing Stations"):
    group = group.copy()
    
    # Calculate Nowcast values
    group['Nowcast_PM25'] = nowcast(group['PM25'])
    group['Nowcast_PM10'] = nowcast(group['PM10'])
    
    # Backfill the initial NaN values
    group['Nowcast_PM25'] = group['Nowcast_PM25'].bfill()
    group['Nowcast_PM10'] = group['Nowcast_PM10'].bfill()
    
    result_df.append(group)

out_df = pd.concat(result_df)
out_df.to_csv(os.path.join("Datos_PowerBI", 'SISAIRE-MODIFIED-COMPLETE-FIXED-NOWCAST.csv'), sep=';', decimal='.', index=False)

out_df.head(20)

Processing Stations: 100%|██████████| 22/22 [02:48<00:00,  7.67s/it]


Unnamed: 0,Estacion,Localidad,Fecha,Año,CO,NO2,O3,PM10,PM25,SO2,Nowcast_PM25,Nowcast_PM10
496770,BOGOTA RURAL - MOCHUELO,FONTIBÓN,2022-01-01 00:00:00,2022,,,,,29.0,21.94,18.111111,94.260088
496769,BOGOTA RURAL - MOCHUELO,FONTIBÓN,2022-01-01 01:00:00,2022,,,,,48.0,15.77,18.111111,94.260088
496768,BOGOTA RURAL - MOCHUELO,FONTIBÓN,2022-01-01 02:00:00,2022,,,,,44.0,17.93,18.111111,94.260088
496767,BOGOTA RURAL - MOCHUELO,FONTIBÓN,2022-01-01 03:00:00,2022,,,,,17.0,9.48,18.111111,94.260088
496766,BOGOTA RURAL - MOCHUELO,FONTIBÓN,2022-01-01 04:00:00,2022,,,,,20.0,12.95,18.111111,94.260088
496765,BOGOTA RURAL - MOCHUELO,FONTIBÓN,2022-01-01 05:00:00,2022,,,,,12.0,9.89,18.111111,94.260088
496764,BOGOTA RURAL - MOCHUELO,FONTIBÓN,2022-01-01 06:00:00,2022,,,,,14.0,17.58,16.03937,94.260088
496763,BOGOTA RURAL - MOCHUELO,FONTIBÓN,2022-01-01 07:00:00,2022,,,,,12.0,13.27,14.011765,94.260088
496762,BOGOTA RURAL - MOCHUELO,FONTIBÓN,2022-01-01 08:00:00,2022,,,,,24.0,32.35,19.015656,94.260088
496761,BOGOTA RURAL - MOCHUELO,FONTIBÓN,2022-01-01 09:00:00,2022,,,,,9.0,19.57,14.002933,94.260088
