In [None]:
from pymongo import MongoClient
from pymongo.errors import ConnectionFailure, PyMongoError
from dotenv import load_dotenv
import os
import json

def upload_thread(data, db, collection):
    load_dotenv()
    mongo_uri = os.getenv("MONGO_DB")
    try:
        client = MongoClient(mongo_uri)
        # Verify connection
        client.admin.command('ping')
        # Select database and collection
        db = client["newsdb"]
        articles = db["articles"]  # Changed collection name to be more descriptive
        json_data = json.load(data)  # Load the actual file content
        # Check if data is a list (for multiple documents)
        if isinstance(json_data, list):
            result = articles.insert_many(json_data)  # Use insert_many for arrays
            print(f"Inserted {len(result.inserted_ids)} documents")
        else:
            result = articles.insert_one(json_data)  # Use insert_one for single docs
            print(f"Document inserted with _id: {result.inserted_id}")
        # Find documents by topic (using text search)
    except ConnectionFailure as e:
        print(f"MongoDB connection failed: {str(e)}")
    except FileNotFoundError:
        print("Error: JSON file not found at specified path")
    except json.JSONDecodeError:
        print("Error: Invalid JSON format in file")
    except PyMongoError as e:
        print(f"MongoDB operation error: {str(e)}")
    finally:
        # Ensure connection is closed properly
        if 'client' in locals():
            client.close()


Connected to MongoDB Atlas
Document inserted with _id: 67c3dea8226dc96526ff86f8

Found documents:
- Trump and Zelensky clash in the Oval Office
- Trump and Zelensky clash in the Oval Office

MongoDB connection closed


In [6]:
# Get all the topics from the database

def get_topics(db='newsdb',collection="articles"):
    load_dotenv()
    mongo_uri = os.getenv("MONGO_DB")
    try:
        client = MongoClient(mongo_uri)
        # Verify connection
        client.admin.command('ping')
        # Select database and collection
        db = client["newsdb"]
        articles = db["articles"]  # Changed collection name to be more descriptive
        # Get all the topics
        topics = articles.distinct("_id")
        print(f"Topics found: {topics}")
        return topics
        # Find documents by topic (using text search)
    except ConnectionFailure as e:
        print(f"MongoDB connection failed: {str(e)}")
    except PyMongoError as e:
        print(f"MongoDB operation error: {str(e)}")
    finally:
        # Ensure connection is closed properly
        if 'client' in locals():
            client.close()
            

ids = get_topics()

Topics found: [ObjectId('67c3de26f19c69004f96fd32'), ObjectId('67c3dea8226dc96526ff86f8')]


In [10]:
# Get the document by ID
def get_doc(id):
    load_dotenv()
    mongo_uri = os.getenv("MONGO_DB")
    try:
        client = MongoClient(mongo_uri)
        # Verify connection
        client.admin.command('ping')
        # Select database and collection
        db = client["newsdb"]
        articles = db["articles"]  
        # Get all the topics
        doc = articles.find_one({"_id":id})
        print(f"Document found: {doc}")
        return doc
        # Find documents by topic (using text search)
    except ConnectionFailure as e:
        print(f"MongoDB connection failed: {str(e)}")
    except PyMongoError as e:
        print(f"MongoDB operation error: {str(e)}")
    finally:
        # Ensure connection is closed properly
        if 'client' in locals():
            client.close()
            
get_doc(ids[0])

Document found: {'_id': ObjectId('67c3de26f19c69004f96fd32'), 'topic': 'Trump and Zelensky clash in the Oval Office', 'timestamp': '2025-03-01T20:35:28Z', 'articles': [{'title': "Zelensky's Clash with Trump Draws Pride and Fear in Ukraine | TIME", 'source': 'time.com', 'date': '2025-03-01T20:35:28Z', 'url': 'https://time.com/7263232/kyiv-pride-fear-after-zelensky-confronts-trump/', 'snippet': 'His clash with Trump threatens to rupture an alliance that has kept Ukraine alive during the war with an estimated $183 billion in financial and ...', 'content': 'Zelensky\'s Clash with Trump Draws Pride and Fear in Ukraine | TIME Sign Up for Our Ideas Newsletter POV Subscribe Subscribe Sections Home U.S. Politics World Health Climate Future of Work by Charter Business Tech Entertainment Ideas Science History Sports Magazine TIME 2030 Next Generation Leaders TIME100 Leadership Series TIME Studios Video TIME100 Talks TIMEPieces The TIME Vault TIME for Health TIME for Kids TIME Edge TIMECO2 Red Bor

{'_id': ObjectId('67c3de26f19c69004f96fd32'),
 'topic': 'Trump and Zelensky clash in the Oval Office',
 'timestamp': '2025-03-01T20:35:28Z',
 'articles': [{'title': "Zelensky's Clash with Trump Draws Pride and Fear in Ukraine | TIME",
   'source': 'time.com',
   'date': '2025-03-01T20:35:28Z',
   'url': 'https://time.com/7263232/kyiv-pride-fear-after-zelensky-confronts-trump/',
   'snippet': 'His clash with Trump threatens to rupture an alliance that has kept Ukraine alive during the war with an estimated $183 billion in financial and ...',
   'content': 'Zelensky\'s Clash with Trump Draws Pride and Fear in Ukraine | TIME Sign Up for Our Ideas Newsletter POV Subscribe Subscribe Sections Home U.S. Politics World Health Climate Future of Work by Charter Business Tech Entertainment Ideas Science History Sports Magazine TIME 2030 Next Generation Leaders TIME100 Leadership Series TIME Studios Video TIME100 Talks TIMEPieces The TIME Vault TIME for Health TIME for Kids TIME Edge TIMECO2 Red B