In [3]:
!pip install pymongo

Collecting pymongo
  Downloading pymongo-4.11.3-cp311-cp311-win_amd64.whl.metadata (22 kB)
Collecting dnspython<3.0.0,>=1.16.0 (from pymongo)
  Downloading dnspython-2.7.0-py3-none-any.whl.metadata (5.8 kB)
Downloading pymongo-4.11.3-cp311-cp311-win_amd64.whl (831 kB)
   ---------------------------------------- 0.0/831.6 kB ? eta -:--:--
   ---------------------------------------- 10.2/831.6 kB ? eta -:--:--
   ---------------- ----------------------- 337.9/831.6 kB 4.2 MB/s eta 0:00:01
   ----------------------------------- ---- 737.3/831.6 kB 5.8 MB/s eta 0:00:01
   ---------------------------------------- 831.6/831.6 kB 4.8 MB/s eta 0:00:00
Downloading dnspython-2.7.0-py3-none-any.whl (313 kB)
   ---------------------------------------- 0.0/313.6 kB ? eta -:--:--
   ------------------- -------------------- 153.6/313.6 kB 4.6 MB/s eta 0:00:01
   ---------------------------------------  307.2/313.6 kB 3.8 MB/s eta 0:00:01
   ---------------------------------------- 313.6/313.6 kB 3.2 

In [18]:
import json
import time
import os
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
from pymongo import MongoClient
from dotenv import load_dotenv

# Load environment variables from src/config/mongo.env
load_dotenv(dotenv_path='../config/mongo.env')

# Get MongoDB URI and DB name from environment
MONGO_URI = os.getenv("MONGO_URI", "mongodb://localhost:27017/")
DB_NAME = os.getenv("DB_NAME", "DisasterAlert")

# File paths
CLUSTERED_DISASTERS_FILE = "clustered_disasters.json"
EVENTS_FILE = "events.json"

# MongoDB connection setup
client = MongoClient(MONGO_URI)
db = client[DB_NAME]

# Ensure collections exist before inserting data
if "events" not in db.list_collection_names():
    db.create_collection("events")

if "posts" not in db.list_collection_names():
    db.create_collection("posts")

events_collection = db["events"]
posts_collection = db["posts"]

class FileChangeHandler(FileSystemEventHandler):
    def on_modified(self, event):
        if event.src_path.endswith(CLUSTERED_DISASTERS_FILE):
            print("Detected changes in clustered_disasters.json. Updating events...")
            update_events()
            update_mongo_db()

def load_json(file_path):
    try:
        with open(file_path, "r") as f:
            return json.load(f)
    except FileNotFoundError:
        print(f"Error: {file_path} not found.")
        return []
    except json.JSONDecodeError:
        print(f"Error: Failed to decode {file_path}.")
        return []

def update_events():
    clustered_disasters = load_json(CLUSTERED_DISASTERS_FILE)
    events = load_json(EVENTS_FILE)

    # Create a mapping of event_name to event details
    event_data = {}
    for post in clustered_disasters:
        event_name = post.get("event_name")
        credibility_score = post.get("credibility_score", 0)
        disaster_type = post.get("disaster_type")
        location = post.get("location")
        created_utc = post.get("created_utc")

        if event_name:
            if event_name not in event_data:
                event_data[event_name] = {
                    "total_score": 0,
                    "count": 0,
                    "disaster_type": disaster_type,
                    "location": location,
                    "created_utc": created_utc,
                    "updated_utc": created_utc
                }
            
            event_data[event_name]["total_score"] += credibility_score
            event_data[event_name]["count"] += 1

            # Update to latest created_utc
            if created_utc > event_data[event_name]["updated_utc"]:
                event_data[event_name]["updated_utc"] = created_utc
    
    # Convert existing events into a dictionary for quick lookup
    event_dict = {event["event_name"]: event for event in events}

    # Update existing events or add new ones
    updated_events = []
    for event_name, data in event_data.items():
        total_score = data["total_score"]
        count = data["count"]
        avg_score = round(total_score / count, 2) if count > 0 else 0

        if event_name in event_dict:
            event_dict[event_name]["no_of_posts"] = count
            event_dict[event_name]["average_credibility_score"] = avg_score
            event_dict[event_name]["disaster_type"] = data["disaster_type"]
            event_dict[event_name]["location"] = data["location"]
            event_dict[event_name]["created_utc"] = data["created_utc"]
            event_dict[event_name]["updated_utc"] = data["updated_utc"]
            updated_events.append(event_dict[event_name])
        else:
            updated_events.append({
                "event_name": event_name,
                "no_of_posts": count,
                "average_credibility_score": avg_score,
                "disaster_type": data["disaster_type"],
                "location": data["location"],
                "created_utc": data["created_utc"],
                "updated_utc": data["updated_utc"]
            })

    # Save updated events.json
    with open(EVENTS_FILE, "w") as f:
        json.dump(updated_events, f, indent=4)
    
    print("Updated events.json successfully!")

def update_mongo_db():
    """Update MongoDB collections with the latest data."""
    clustered_disasters = load_json(CLUSTERED_DISASTERS_FILE)
    events = load_json(EVENTS_FILE)

    if clustered_disasters:
        posts_collection.delete_many({})  # Clear old data
        posts_collection.insert_many(clustered_disasters)
        print("Updated MongoDB: clustered_disasters collection.")

    if events:
        events_collection.delete_many({})  # Clear old data
        events_collection.insert_many(events)
        print("Updated MongoDB: events collection.")

if __name__ == "__main__":
    event_handler = FileChangeHandler()
    observer = Observer()
    observer.schedule(event_handler, path=".", recursive=False)
    observer.start()

    try:
        print("Watching for changes in clustered_disasters.json...")
        while True:
            time.sleep(10)
    except KeyboardInterrupt:
        observer.stop()
    observer.join()


Watching for changes in clustered_disasters.json...
Detected changes in clustered_disasters.json. Updating events...
Updated events.json successfully!
Detected changes in clustered_disasters.json. Updating events...
Updated events.json successfully!
Detected changes in clustered_disasters.json. Updating events...
Updated events.json successfully!
Updated MongoDB: clustered_disasters collection.
Detected changes in clustered_disasters.json. Updating events...
Updated events.json successfully!
Updated MongoDB: clustered_disasters collection.
Detected changes in clustered_disasters.json. Updating events...
Updated events.json successfully!
Detected changes in clustered_disasters.json. Updating events...
Updated events.json successfully!
Detected changes in clustered_disasters.json. Updating events...
Updated events.json successfully!
Updated MongoDB: clustered_disasters collection.
Updated MongoDB: events collection.
Detected changes in clustered_disasters.json. Updating events...
Updated