# Additional Functions

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import joblib
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import xgboost as xgb
from xgboost import XGBRegressor

## Definitions

In [2]:
def Make_Circle(latitude, longitude, radius_km=50, num_points=100):
    """
    Generate points forming a circle around a given latitude and longitude.

    Parameters:
    latitude (float): Latitude of the center point.
    longitude (float): Longitude of the center point.
    radius_km (float): Radius of the circle in kilometers. Default is 50 km.
    num_points (int): Number of points to generate along the circle. Default is 100.

    Returns:
    list of tuples: List containing (latitude, longitude) points forming the circle.
    """

    # Convert radius from kilometers to degrees
    # 1 degree of latitude is approximately 111.32 kilometers
    radius_deg = radius_km / 111.32  # Approximate conversion factor

    angles = np.linspace(0, 2 * np.pi, num_points)
    circle_points = []

    for angle in angles:
        lat = latitude + radius_deg * np.cos(angle)
        lon = longitude + radius_deg * np.sin(angle) / np.cos(np.radians(latitude))
        circle_points.append((lat, lon))

    return circle_points

In [3]:
def Make_Prediction_Dataset(latitude, longitude, date, radius_km=50, num_points=100):
    """
    Create a dataset for prediction based on a circle around a given latitude and longitude.
    
    Parameters:
    latitude (float): Latitude of the center point.
    longitude (float): Longitude of the center point.
    date (str): Date in 'YYYY-MM-DD' format.
    radius_km (float): Radius of the circle in kilometers. Default is 50 km.
    num_points (int): Number of points to generate along the circle. Default is 100.
    
    Returns:
    pd.DataFrame: DataFrame containing the features for prediction.
    """

    # Generate circle points
    circle_points = Make_Circle(latitude, longitude, radius_km, num_points)
    date = pd.to_datetime(date,format="%Y-%m-%d")

    # Prepare the dataset
    data = []
    for lat, lon in circle_points:
        year = date.year
        month = date.month
        day_of_year = date.dayofyear
        month_sin = np.sin(2 * np.pi * month / 12)
        month_cos = np.cos(2 * np.pi * month / 12)
        day_sin = np.sin(2 * np.pi * day_of_year / 365)
        day_cos = np.cos(2 * np.pi * day_of_year / 365)

        data.append({
            'Latitude': lat,
            'Longitude': lon,
            'Year': year,
            'Month_sin': month_sin,
            'Month_cos': month_cos,
            'Day_sin': day_sin,
            'Day_cos': day_cos
        })

    return pd.DataFrame(data), date

In [4]:
def calculateAQIS(concentrations, gas, breakpointsAQI):
    """
    Calculate the Air Quality Index (AQI) for a specific gas using the provided dataset and breakpoints.
    """
    aqis = []  # AQI values
    IHIs = []  # High Index
    ILOs = []  # Low Index
    
    # Calculate the AQI using the breakpoints
    for concentration in concentrations:
        for i in range(len(breakpointsAQI[gas]) - 1):
            if concentration >= breakpointsAQI[gas][i] and concentration < breakpointsAQI[gas][i + 1]:
                IHIs.append((i + 1) * 50)  # Corresponds to BP_hi
                ILOs.append(i * 50)        # Corresponds to BP_lo
                aqi = ((IHIs[-1] - ILOs[-1]) / (breakpointsAQI[gas][i + 1] - breakpointsAQI[gas][i])) * \
                      (concentration - breakpointsAQI[gas][i]) + ILOs[-1]
                aqis.append(aqi)
                break
        else:
            # If concentration is above the last breakpoint
            aqis.append(300)  # Assign a high AQI value for concentrations above the last breakpoint

    return np.array(aqis)

In [5]:
def calculateAQI(dataset,breakpointsAQI) :
    "dataset includes following columns: O3, pm2.5, pm10, CO, SO2, NO2"

    # we create a dictionary to store AQI values for each gas
    aqi_values = {}
    aqi_observations = [] # final AQI values for each observation
    for gas in ['Predictions_O3', 'Predictions_PM25', 'Predictions_PM10', 'Predictions_CO', 'Predictions_SO2', 'Predictions_NO2']:
        concentrations = dataset[gas].values
        aqi_gas = calculateAQIS(concentrations, gas, breakpointsAQI)
        # we store the AQI values for the gas in the dictionary
        aqi_values[gas] = aqi_gas

    # we store in aqi_observations the maximum AQI value among all gases
    for i in range(len(dataset)):
        max_aqi = max(aqi_values[gas][i] for gas in aqi_values)
        aqi_observations.append(max_aqi)
    
    return aqi_observations

In [6]:
def Make_CSV(latitude, longitude, date, route_array, output_file, radius_km=50, num_points=100):
    """
    Generate predictions and save them to a CSV file.

    Parameters:
    latitude (float): Latitude of the center point.
    longitude (float): Longitude of the center point.
    date (str): Date in 'YYYY-MM-DD' format.
    route_array (list): List of paths to saved model files.
    output_file (str): Name of the output CSV file.
    radius_km (float): Radius of the circle in kilometers. Default is 50 km.
    num_points (int): Number of points to generate. Default is 100.

    Returns:
    None
    """
    # Gas name array
    gas_names = ['CO', 'O3', 'NO2','SO2', 'PM25', 'PM10']

    # Create prediction dataset
    training_dataset, date = Make_Prediction_Dataset(latitude, longitude, date, radius_km, num_points)
    dataset = training_dataset.copy()
    dataset.drop(columns=['Month_sin', 'Month_cos', 'Day_sin', 'Day_cos','Year'], inplace=True)
    dataset['Date'] = date

    # Generate predictions
    for route in route_array:
        model = joblib.load(route)
        predictions = model.predict(training_dataset)
        dataset[f'Predictions_{gas_names[route_array.index(route)]}'] = predictions

    # Define breakpoints for AQI calculation
    breakpointsAQI = {
        'Predictions_O3': [0, 55, 71, 86, 106, 201, 301, 401, 501],
        'Predictions_PM25': [0.0, 12.1, 35.5, 55.5, 150.5, 250.5, 350.5, 500.5],
        'Predictions_PM10': [0, 55, 155, 255, 355, 425, 505, 605, 1000],
        'Predictions_CO': [0.0, 4.5, 9.5, 12.5, 15.5, 30.5, 40.5, 50.5, 60.5],
        'Predictions_SO2': [0, 36, 76, 186, 305, 605, 805, 1005, 2000],
        'Predictions_NO2': [0, 54, 101, 361, 650, 1250, 1650, 2050, 3000]
    }
    aqi_values = calculateAQI(dataset, breakpointsAQI)
    dataset.drop(columns=['Predictions_CO', 'Predictions_O3', 'Predictions_NO2', 'Predictions_SO2', 'Predictions_PM25', 'Predictions_PM10'], inplace=True)
    dataset['AQI'] = aqi_values

    # Save to CSV
    dataset.to_csv(output_file, index=False)

## Function Testing

In [7]:
# Test Make_Circle function
print("=== Make_Circle Test ===")
test_latitude = 40.7128  # New York
test_longitude = -74.0060
test_radius = 25  # 25 km

circle_points = Make_Circle(test_latitude, test_longitude, test_radius, num_points=10)
print(f"Circle points around ({test_latitude}, {test_longitude}) with radius {test_radius} km:")
for i, (lat, lon) in enumerate(circle_points[:5]):  # Show only first 5
    print(f"  Point {i+1}: ({lat:.4f}, {lon:.4f})")
print(f"  ... and {len(circle_points)-5} more points")

=== Make_Circle Test ===
Circle points around (40.7128, -74.006) with radius 25 km:
  Point 1: (40.9374, -74.0060)
  Point 2: (40.8848, -73.8156)
  Point 3: (40.7518, -73.7142)
  Point 4: (40.6005, -73.7494)
  Point 5: (40.5018, -73.9047)
  ... and 5 more points


In [8]:
# Test Make_Prediction_Dataset function
print("\n=== Make_Prediction_Dataset Test ===")
test_date = "2023-06-15"

prediction_dataset = Make_Prediction_Dataset(
    test_latitude, test_longitude, test_date, test_radius, num_points=10
)
print(f"Prediction dataset created for {test_date}:")
prediction_dataset


=== Make_Prediction_Dataset Test ===
Prediction dataset created for 2023-06-15:


(    Latitude  Longitude  Year     Month_sin  Month_cos   Day_sin   Day_cos
 0  40.937378 -74.006000  2023  1.224647e-16       -1.0  0.280231 -0.959933
 1  40.884837 -73.815554  2023  1.224647e-16       -1.0  0.280231 -0.959933
 2  40.751798 -73.714220  2023  1.224647e-16       -1.0  0.280231 -0.959933
 3  40.600511 -73.749413  2023  1.224647e-16       -1.0  0.280231 -0.959933
 4  40.501766 -73.904666  2023  1.224647e-16       -1.0  0.280231 -0.959933
 5  40.501766 -74.107334  2023  1.224647e-16       -1.0  0.280231 -0.959933
 6  40.600511 -74.262587  2023  1.224647e-16       -1.0  0.280231 -0.959933
 7  40.751798 -74.297780  2023  1.224647e-16       -1.0  0.280231 -0.959933
 8  40.884837 -74.196446  2023  1.224647e-16       -1.0  0.280231 -0.959933
 9  40.937378 -74.006000  2023  1.224647e-16       -1.0  0.280231 -0.959933,
 Timestamp('2023-06-15 00:00:00'))

In [9]:
# Test Make_CSV function
print("\n=== Make_CSV Test ===")

latitude=40.7128  # New York
longitude=-74.0060
date="2023-06-15"
route_array = [
    '../models/CO_model.pkl',
    '../models/O3_model.pkl',
    '../models/NO2_model.pkl',
    '../models/SO2_model.pkl',
    '../models/PM25_model.pkl',
    '../models/PM10_model.pkl'
]

output_name = '../output/predictions_output.csv'
Make_CSV(latitude, longitude, date, route_array, output_name)

# Read and show the result
result_df = pd.read_csv(output_name)
print(f"\nCSV file generated: {output_name}")
print(f"File size: {result_df.shape}")
print("\nFirst 5 rows of CSV:")
print(result_df.head())
print("\nAvailable columns:")
print(result_df.columns.tolist())


=== Make_CSV Test ===

CSV file generated: ../output/predictions_output.csv
File size: (100, 4)

First 5 rows of CSV:
    Latitude  Longitude        Date        AQI
0  41.161956 -74.006000  2023-06-15  37.224052
1  41.161051 -73.968417  2023-06-15  37.224052
2  41.158342 -73.930986  2023-06-15  37.224052
3  41.153839 -73.893857  2023-06-15  35.244110
4  41.147560 -73.857179  2023-06-15  35.244110

Available columns:
['Latitude', 'Longitude', 'Date', 'AQI']


## API Service for Frontend Integration

In [None]:
# API imports
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import uvicorn
from typing import List, Dict, Any
import os
import json
from datetime import datetime

In [None]:
# Request and Response Models
class PredictionRequest(BaseModel):
    latitude: float
    longitude: float
    date: str  # Format: YYYY-MM-DD
    radius_km: float = 50
    num_points: int = 100

class PredictionResponse(BaseModel):
    status: str
    data: List[Dict[str, Any]]
    metadata: Dict[str, Any]

In [None]:
# Initialize FastAPI app
app = FastAPI(
    title="Air Quality Prediction API",
    description="API for predicting air quality based on location and date",
    version="1.0.0"
)

# Configure model paths
MODEL_PATHS = {
    'CO': '../models/CO_model.pkl',
    'O3': '../models/O3_model.pkl',
    'NO2': '../models/NO2_model.pkl',
    'SO2': '../models/SO2_model.pkl',
    'PM25': '../models/PM25_model.pkl',
    'PM10': '../models/PM10_model.pkl'
}

# Ensure models directory exists
os.makedirs('../models', exist_ok=True)
os.makedirs('../output', exist_ok=True)

In [None]:
# Enhanced Make_Prediction_API function
def Make_Prediction_API(latitude: float, longitude: float, date: str, radius_km: float = 50, num_points: int = 100):
    """
    Generate air quality predictions for API response.
    
    Parameters:
    latitude (float): Latitude of the center point.
    longitude (float): Longitude of the center point.
    date (str): Date in 'YYYY-MM-DD' format.
    radius_km (float): Radius of the circle in kilometers. Default is 50 km.
    num_points (int): Number of points to generate. Default is 100.
    
    Returns:
    dict: Dictionary containing predictions and metadata.
    """
    try:
        # Validate date format
        datetime.strptime(date, "%Y-%m-%d")
        
        # Gas name array (matching model order)
        gas_names = ['CO', 'O3', 'NO2', 'SO2', 'PM25', 'PM10']
        route_array = [MODEL_PATHS[gas] for gas in gas_names]
        
        # Check if all model files exist
        missing_models = []
        for gas, path in MODEL_PATHS.items():
            if not os.path.exists(path):
                missing_models.append(gas)
        
        if missing_models:
            return {
                "error": f"Missing model files for: {', '.join(missing_models)}",
                "available_models": [gas for gas in gas_names if gas not in missing_models]
            }
        
        # Create prediction dataset
        training_dataset, processed_date = Make_Prediction_Dataset(latitude, longitude, date, radius_km, num_points)
        dataset = training_dataset.copy()
        
        # Generate predictions for each gas
        predictions = {}
        for i, route in enumerate(route_array):
            try:
                model = joblib.load(route)
                gas_predictions = model.predict(training_dataset)
                predictions[f'Predictions_{gas_names[i]}'] = gas_predictions.tolist()
                dataset[f'Predictions_{gas_names[i]}'] = gas_predictions
            except Exception as e:
                print(f"Error loading model {gas_names[i]}: {str(e)}")
                continue
        
        # Calculate AQI
        breakpointsAQI = {
            'Predictions_O3': [0, 55, 71, 86, 106, 201, 301, 401, 501],
            'Predictions_PM25': [0.0, 12.1, 35.5, 55.5, 150.5, 250.5, 350.5, 500.5],
            'Predictions_PM10': [0, 55, 155, 255, 355, 425, 505, 605, 1000],
            'Predictions_CO': [0.0, 4.5, 9.5, 12.5, 15.5, 30.5, 40.5, 50.5, 60.5],
            'Predictions_SO2': [0, 36, 76, 186, 305, 605, 805, 1005, 2000],
            'Predictions_NO2': [0, 54, 101, 361, 650, 1250, 1650, 2050, 3000]
        }
        
        # Only calculate AQI if we have predictions for all gases
        if len(predictions) == len(gas_names):
            aqi_values = calculateAQI(dataset, breakpointsAQI)
            dataset['AQI'] = aqi_values
        
        # Prepare output dataset
        output_dataset = dataset[['Latitude', 'Longitude']].copy()
        output_dataset['Date'] = processed_date
        
        # Add predictions
        for gas in gas_names:
            pred_col = f'Predictions_{gas}'
            if pred_col in dataset.columns:
                output_dataset[pred_col] = dataset[pred_col]
        
        # Add AQI if calculated
        if 'AQI' in dataset.columns:
            output_dataset['AQI'] = dataset['AQI']
        
        # Convert to list of dictionaries for JSON response
        result_data = output_dataset.to_dict('records')
        
        # Prepare metadata
        metadata = {
            "center_point": {"latitude": latitude, "longitude": longitude},
            "date": date,
            "radius_km": radius_km,
            "num_points": num_points,
            "total_predictions": len(result_data),
            "available_pollutants": gas_names,
            "aqi_calculated": 'AQI' in output_dataset.columns
        }
        
        return {
            "status": "success",
            "data": result_data,
            "metadata": metadata
        }
        
    except ValueError as e:
        return {"error": f"Invalid date format. Use YYYY-MM-DD: {str(e)}"}
    except Exception as e:
        return {"error": f"Prediction failed: {str(e)}"}

In [None]:
# API Endpoints
@app.get("/")
async def root():
    return {
        "message": "Air Quality Prediction API",
        "version": "1.0.0",
        "endpoints": {
            "/predict": "POST - Generate air quality predictions",
            "/health": "GET - Check API health",
            "/models": "GET - Check available models"
        }
    }

@app.get("/health")
async def health_check():
    return {"status": "healthy", "timestamp": datetime.now().isoformat()}

@app.get("/models")
async def check_models():
    model_status = {}
    for gas, path in MODEL_PATHS.items():
        model_status[gas] = {
            "path": path,
            "exists": os.path.exists(path)
        }
    return {"models": model_status}

@app.post("/predict", response_model=PredictionResponse)
async def predict_air_quality(request: PredictionRequest):
    """
    Generate air quality predictions for a given location and date.
    
    Parameters:
    - latitude: Latitude of the center point
    - longitude: Longitude of the center point  
    - date: Date in YYYY-MM-DD format
    - radius_km: Radius of the circle in kilometers (default: 50)
    - num_points: Number of points to generate (default: 100)
    
    Returns:
    - Prediction data with AQI values for the specified area
    """
    try:
        result = Make_Prediction_API(
            latitude=request.latitude,
            longitude=request.longitude,
            date=request.date,
            radius_km=request.radius_km,
            num_points=request.num_points
        )
        
        if "error" in result:
            raise HTTPException(status_code=400, detail=result["error"])
        
        return PredictionResponse(**result)
        
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")

In [None]:
# Function to run the API server
def run_api_server(host: str = "0.0.0.0", port: int = 8000):
    """
    Run the FastAPI server.
    
    Parameters:
    host (str): Host to bind to (default: "0.0.0.0")
    port (int): Port to bind to (default: 8000)
    """
    print(f"Starting Air Quality Prediction API on {host}:{port}")
    print(f"API Documentation will be available at: http://{host}:{port}/docs")
    print(f"API Redoc will be available at: http://{host}:{port}/redoc")
    
    uvicorn.run(app, host=host, port=port, log_level="info")

# Alternative: Run server in background (for Jupyter)
def run_api_background(host: str = "127.0.0.1", port: int = 8000):
    """
    Run the API server in background (useful for Jupyter notebooks).
    """
    import threading
    
    def start_server():
        uvicorn.run(app, host=host, port=port, log_level="info")
    
    server_thread = threading.Thread(target=start_server, daemon=True)
    server_thread.start()
    
    print(f"API server started in background on {host}:{port}")
    print(f"API Documentation: http://{host}:{port}/docs")
    print(f"API Redoc: http://{host}:{port}/redoc")
    
    return server_thread

### API Testing Examples