In [1]:
import requests
import pandas as pd
from datetime import datetime
import time
import pymongo
import os

# MongoDB Connection Settings
MONGO_URI = "mongodb://localhost:27017/"
DB_NAME = "air_qaulity"  # Using the existing database name from your MongoDB
COLLECTION_NAME = "air_quality_info"

# WAQI API Key & Mumbai City
API_KEY = "88a2d7fb54c40a3cf2e81f657ebb91996ca31e2e"  # Your API Key
CITY = "Mumbai"

def connect_to_mongodb():
    """Establishes connection to MongoDB."""
    try:
        client = pymongo.MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000)
        # Force a command to check the connection
        client.admin.command('ping')
        print("Connected to MongoDB successfully!")
        return client
    except pymongo.errors.ServerSelectionTimeoutError as e:
        print(f"Error connecting to MongoDB: {e}")
        return None

def get_live_air_quality():
    """Fetch live air quality data from WAQI API for Mumbai."""
    url = f"https://api.waqi.info/feed/{CITY}/?token={API_KEY}"
    
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        data = response.json()

        if data["status"] != "ok":
            print("Error: Unable to fetch air quality data.")
            return None

        air_data = data.get("data", {})
        if not air_data:
            print("No air quality data found.")
            return None

        aqi = air_data.get("aqi", 0)
        pm25 = air_data.get("iaqi", {}).get("pm25", {}).get("v", None)
        pm10 = air_data.get("iaqi", {}).get("pm10", {}).get("v", None)
        co = air_data.get("iaqi", {}).get("co", {}).get("v", None)
        no2 = air_data.get("iaqi", {}).get("no2", {}).get("v", None)
        so2 = air_data.get("iaqi", {}).get("so2", {}).get("v", None)
        o3 = air_data.get("iaqi", {}).get("o3", {}).get("v", None)

        # Avoid incorrect zero values
        if aqi == 0 or pm25 is None or pm10 is None or co is None or no2 is None or so2 is None or o3 is None:
            print("Incomplete air quality data, skipping entry.")
            return None

        # Get current time and prepare data
        now = datetime.now()
        air_quality_info = {
            "location": CITY,
            "aqi": aqi,
            "pm25": float(pm25),
            "pm10": float(pm10),
            "co": float(co),
            "no2": float(no2),
            "so2": float(so2),
            "o3": float(o3),
            "time": now
        }

        return air_quality_info

    except requests.exceptions.RequestException as e:
        print(f"Error fetching air quality data: {e}")
        return None

def save_air_quality_data(client, data):
    """Saves air quality data to MongoDB."""
    if data is not None:
        try:
            # Access the database and collection
            db = client[DB_NAME]
            collection = db[COLLECTION_NAME]
            
            # Insert data
            result = collection.insert_one(data)
            print(f"Air quality data saved at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} with ID: {result.inserted_id}")
            return True
        except Exception as e:
            print(f"Error saving data to MongoDB: {e}")
            return False
    else:
        print("No valid air quality data available.")
        return False

if __name__ == "__main__":
    # Connect to MongoDB
    mongo_client = connect_to_mongodb()
    
    if mongo_client is None:
        print("Failed to connect to MongoDB. Exiting.")
        exit(1)
    
    print("Mumbai Air Quality Scraper Started... (Press Ctrl+C to Stop)")

    try:
        while True:
            air_quality_data = get_live_air_quality()
            save_air_quality_data(mongo_client, air_quality_data)
            time.sleep(300)  # Fetches data every 5 minutes
    except KeyboardInterrupt:
        print("\nScraper Stopped by User.")
    finally:
        # Close the MongoDB connection when done
        if mongo_client:
            mongo_client.close()
            print("MongoDB connection closed.")

Connected to MongoDB successfully!
Mumbai Air Quality Scraper Started... (Press Ctrl+C to Stop)
Air quality data saved at 2025-03-30 13:07:20 with ID: 67e8f4b0194ec62c9b5be156
Air quality data saved at 2025-03-30 13:12:20 with ID: 67e8f5dc194ec62c9b5be157
Air quality data saved at 2025-03-30 13:17:21 with ID: 67e8f709194ec62c9b5be158
Air quality data saved at 2025-03-30 13:22:22 with ID: 67e8f836194ec62c9b5be159
Air quality data saved at 2025-03-30 13:27:22 with ID: 67e8f962194ec62c9b5be15a
Air quality data saved at 2025-03-30 13:32:23 with ID: 67e8fa8f194ec62c9b5be15b
Air quality data saved at 2025-03-30 13:37:23 with ID: 67e8fbbb194ec62c9b5be15c
Air quality data saved at 2025-03-30 13:42:24 with ID: 67e8fce8194ec62c9b5be15d
Air quality data saved at 2025-03-30 13:47:24 with ID: 67e8fe14194ec62c9b5be15e
Air quality data saved at 2025-03-30 13:52:25 with ID: 67e8ff41194ec62c9b5be15f
Air quality data saved at 2025-03-30 13:57:25 with ID: 67e9006d194ec62c9b5be160
Air quality data saved a