In [15]:
import requests
import pandas as pd
from datetime import datetime
import time
from pymongo import MongoClient
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager


MONGO_HOST = os.getenv("MONGO_HOST", "127.0.0.1")
MONGO_PORT = int(os.getenv("MONGO_PORT", 27017))
MONGO_DB = os.getenv("MONGO_DB", "air_pollution") 


API_KEY = "88a2d7fb54c40a3cf2e81f657ebb91996ca31e2e"  
CITY = "Mumbai"


client = MongoClient(MONGO_HOST, MONGO_PORT)
db = client[MONGO_DB]
collection = db["air_quality_info"] 

def get_live_air_quality():
    """Fetch live air quality data from WAQI API for Mumbai."""
    url = f"https://api.waqi.info/feed/{CITY}/?token={API_KEY}"
    
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        data = response.json()

        # Check API response format
        if "status" not in data or data["status"] != "ok":
            print("Error: Invalid API response structure.")
            return None

        air_data = data.get("data", {})
        if not air_data or not isinstance(air_data, dict):
            print("No valid air quality data found.")
            return None

        
        def extract_value(key):
            """Safely extract pollutant values"""
            value = air_data.get("iaqi", {}).get(key, {}).get("v", None)
            return value if isinstance(value, (int, float)) else None

        aqi = air_data.get("aqi", None)
        pm25 = extract_value("pm25")
        pm10 = extract_value("pm10")
        co = extract_value("co")
        no2 = extract_value("no2")
        so2 = extract_value("so2")
        o3 = extract_value("o3")

       
        if not isinstance(aqi, (int, float)) or any(v is None for v in [pm25, pm10, co, no2, so2, o3]):
            print(f"Incomplete data received: AQI={aqi}, PM2.5={pm25}, PM10={pm10}, CO={co}, NO2={no2}, SO2={so2}, O3={o3}. Skipping entry.")
            return None

       
        air_quality_info = {
            "location": CITY,
            "aqi": aqi,
            "pm25": pm25,
            "pm10": pm10,
            "co": co,
            "no2": no2,
            "so2": so2,
            "o3": o3,
            "time": datetime.now()
        }

        return air_quality_info

    except requests.exceptions.RequestException as e:
        print(f"Error fetching air quality data: {e}")
        return None

def save_air_quality_data(data):
    """Saves air quality data to MongoDB with error handling."""
    if data:
        try:
            collection.insert_one(data)
            print(f"Air quality data saved to MongoDB at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
        except Exception as e:
            print(f"Error saving data to MongoDB: {e}")
    else:
        print("No valid air quality data available.")

if __name__ == "__main__":
    print("Mumbai Air Quality Scraper Started for MongoDB... (Press Ctrl+C to Stop)")

    try:
        while True:
            data = get_live_air_quality()
            save_air_quality_data(data)
            time.sleep(300)  
    except KeyboardInterrupt:
        print("\nScraper Stopped by User.")
    finally:
        client.close() 

Mumbai Air Quality Scraper Started for MongoDB... (Press Ctrl+C to Stop)
Air quality data saved to MongoDB at 2025-04-03 23:17:05

Scraper Stopped by User.
