In [1]:
#Data
import pandas as pd
import numpy as np
#-------------------------------------------------------

#Visualization
import seaborn as sns
import matplotlib.pyplot as plt
#-------------------------------------------------------
#Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
#-------------------------------------------------------

# XGBoost
import xgboost as xgb
from xgboost import XGBRegressor
#-------------------------------------------------------

# Linear Models
from sklearn.linear_model import LinearRegression
#-------------------------------------------------------

# Metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.metrics import classification_report
#-------------------------------------------------------

# Hyperparameter Tuning
from sklearn.model_selection import RandomizedSearchCV
#-------------------------------------------------------

# Others
import joblib
#-------------------------------------------------------



In [2]:
def Make_Circle(latitude, longitude, radius_km=50, num_points=100):
    """
    Generate points forming a circle around a given latitude and longitude.

    Parameters:
    latitude (float): Latitude of the center point.
    longitude (float): Longitude of the center point.
    radius_km (float): Radius of the circle in kilometers. Default is 50 km.
    num_points (int): Number of points to generate along the circle. Default is 100.

    Returns:
    list of tuples: List containing (latitude, longitude) points forming the circle.
    """

    # Convert radius from kilometers to degrees
    # 1 degree of latitude is approximately 111.32 kilometers
    radius_deg = radius_km / 111.32  # Approximate conversion factor

    angles = np.linspace(0, 2 * np.pi, num_points)
    circle_points = []

    for angle in angles:
        lat = latitude + radius_deg * np.cos(angle)
        lon = longitude + radius_deg * np.sin(angle) / np.cos(np.radians(latitude))
        circle_points.append((lat, lon))

    return circle_points

In [3]:
def Make_Prediction_Dataset(latitude, longitude, date, radius_km=50, num_points=100):
    """
    Create a dataset for prediction based on a circle around a given latitude and longitude.
    Parameters:
    latitude (float): Latitude of the center point.
    longitude (float): Longitude of the center point.
    date (str): Date in 'YYYY-MM-DD' format.
    radius_km (float): Radius of the circle in kilometers. Default is 50 km.
    num_points (int): Number of points to generate along the circle. Default is 100.
    Returns:
    pd.DataFrame: DataFrame containing the features for prediction.
    """

    # Generate circle points
    circle_points = Make_Circle(latitude, longitude)
    date = pd.to_datetime(date,format="%Y-%m-%d")

    # Prepare the dataset
    data = []
    for lat, lon in circle_points:
        year = date.year
        month = date.month
        day_of_year = date.dayofyear
        month_sin = np.sin(2 * np.pi * month / 12)
        month_cos = np.cos(2 * np.pi * month / 12)
        day_sin = np.sin(2 * np.pi * day_of_year / 365)
        day_cos = np.cos(2 * np.pi * day_of_year / 365)

        data.append({
            'Latitude': lat,
            'Longitude': lon,
            'Year': year,
            'Month_sin': month_sin,
            'Month_cos': month_cos,
            'Day_sin': day_sin,
            'Day_cos': day_cos
        })

    return pd.DataFrame(data)

In [None]:
def Predict(latitude, longitude, date, model, route,radius_km=50, num_points=100):
    """
    Generate predictions using the trained model for points around a given latitude and longitude on a specific date.
    Parameters:
    latitude (float): Latitude of the center point.
    longitude (float): Longitude of the center point.
    date (str): Date in 'YYYY-MM-DD' format.
    model: Trained machine learning model.
    radius_km (float): Radius of the circle in kilometers. Default is 50 km.
    num_points (int): Number of points to generate along the circle. Default is 100.
    Returns:
    np.ndarray: Array of predictions for the generated points.
    """
    model=joblib.load(route)
    dataset = Make_Prediction_Dataset(latitude, longitude, date, radius_km, num_points)
    return model.predict(dataset)

In [14]:
def calculateAQIS(concentrations, gas, breakpointsAQI):
    """
    Calculate the Air Quality Index (AQI) for a specific gas using the provided dataset and breakpoints.
    """
    aqis = []  # AQI values
    IHIs = []  # High Index
    ILOs = []  # Low Index
    
    # Calculate the AQI using the breakpoints
    for concentration in concentrations:
        for i in range(len(breakpointsAQI[gas]) - 1):
            if concentration >= breakpointsAQI[gas][i] and concentration < breakpointsAQI[gas][i + 1]:
                IHIs.append((i + 1) * 50)  # Corresponds to BP_hi
                ILOs.append(i * 50)        # Corresponds to BP_lo
                aqi = ((IHIs[-1] - ILOs[-1]) / (breakpointsAQI[gas][i + 1] - breakpointsAQI[gas][i])) * \
                      (concentration - breakpointsAQI[gas][i]) + ILOs[-1]
                aqis.append(aqi)
                break
        else:
            # If concentration is above the last breakpoint
            aqis.append(300)  # Assign a high AQI value for concentrations above the last breakpoint

    return np.array(aqis)

In [6]:
def calculateAQI(dataset,breakpointsAQI) :
    "dataset includes following columns: O3, pm2.5, pm10, CO, SO2, NO2"

    # we create a dictionary to store AQI values for each gas
    aqi_values = {}
    aqi_observations = [] # final AQI values for each observation
    for gas in ['O3', 'pm2.5', 'pm10', 'CO', 'SO2', 'NO2']:
        concentrations = dataset[gas].values
        aqi_gas = calculateAQIS(concentrations, gas, breakpointsAQI)
        # we store the AQI values for the gas in the dictionary
        aqi_values[gas] = aqi_gas

    # we store in aqi_observations the maximum AQI value among all gases
    for i in range(len(dataset)):
        max_aqi = max(aqi_values[gas][i] for gas in aqi_values)
        aqi_observations.append(max_aqi)
    
         
    return aqi_observations

In [None]:
latitude = 34.0522  # Example latitude
longitude = -118.2437  # Example longitude
date = "2023-06-15"  # Example date
radius_km = 50  # Example radius in kilometers
num_points = 100  # Example number of points
route="../models/SO2_model.pkl"
breakpointsAQI = {
    'O3' : [0,0.055,0.071,0.086,0.106,0.201],
    'pm2.5' :[0,9.1,35.5,55.5,125.5,225.5],
    'pm10' :[0,55,155,255,355,425],
    'CO' :[0,4.5,9.5,12.5,15.5,30.5],
    'SO2' :[0,36,76,186,305,605],
    'NO2' :[0,54,101,361,650,1250]
}


predictions = Predict(latitude, longitude, date, model, route, radius_km, num_points)
predictions

In [None]:
# Crear un dataset de ejemplo con 50 observaciones
data = {
    'O3': np.random.uniform(0, 0.2, 50),      # Concentraciones de O3 en ppb
    'pm2.5': np.random.uniform(0, 250, 50),    # Concentraciones de PM2.5 en μg/m³
    'pm10': np.random.uniform(0, 500, 50),     # Concentraciones de PM10 en μg/m³
    'CO': np.random.uniform(0, 30, 50),         # Concentraciones de CO en ppm
    'SO2': np.random.uniform(0, 600, 50),       # Concentraciones de SO2 en ppb
    'NO2': np.random.uniform(0, 1250, 50)       # Concentraciones de NO2 en ppb
}

# Convertir el diccionario a un DataFrame de pandas
test_dataset = pd.DataFrame(data)

# Mostrar el dataset
#print(test_dataset)


breakpointsAQI = {
    'O3' : [0,0.055,0.071,0.086,0.106,0.201],
    'pm2.5' :[0,9.1,35.5,55.5,125.5,225.5],
    'pm10' :[0,55,155,255,355,425],
    'CO' :[0,4.5,9.5,12.5,15.5,30.5],
    'SO2' :[0,36,76,186,305,605],
    'NO2' :[0,54,101,361,650,1250]
}



In [22]:
aqi = calculateAQI(test_dataset, breakpointsAQI)
aqi

[np.float64(211.12295617068975),
 np.float64(240.18231141853641),
 np.float64(217.58611686174703),
 np.float64(223.06245616188363),
 np.float64(237.43019015762974),
 np.float64(230.60425537248528),
 np.float64(240.7187652977422),
 np.float64(231.52339112336324),
 np.float64(208.86891939581534),
 np.float64(300.0)]