In [7]:
# This script demonstrates how to connect to a MongoDB database,
# fetch data from two different collections, update a document,
# and retrieve the updated data using the pymongo library.

import os
from pymongo import MongoClient
from pymongo.errors import ConnectionFailure, OperationFailure
from bson.objectid import ObjectId

# --- IMPORTANT: REPLACE WITH YOUR DETAILS ---
# It's recommended to use environment variables for sensitive data.
# Or, you can hardcode the string directly.
# Example: uri = "mongodb+srv://<user>:<password>@<cluster-url>/test?retryWrites=true&w=majority"
uri = os.environ.get("MONGO_URI", "mongodb+srv://<user>:<password>@<cluster-url>/test?retryWrites=true&w=majority")

DB_NAME = 'test'  # The database name from your screenshot
MESSAGES_COLLECTION_NAME = 'messages'  # The messages collection
FILES_COLLECTION_NAME = 'files'  # The files collection
# --- END OF CONFIGURATION ---

# Create a new client and connect to the server
client = MongoClient(uri)

def main():
    """
    Main function to execute the database operations.
    """
    try:
        # 1. CONNECT TO THE DATABASE
        # ---------------------------
        # The ismaster command is cheap and does not require auth.
        client.admin.command('ismaster')
        print("Successfully connected to MongoDB Atlas!")

        # Get a reference to the database
        db = client[DB_NAME]

        # Get references to your collections
        messages_collection = db[MESSAGES_COLLECTION_NAME]
        files_collection = db[FILES_COLLECTION_NAME]

        # 2. FETCH DATA FROM COLLECTIONS
        # ------------------------------
        print("\n--- Fetching Initial Data ---")

        # Find the first document in the 'messages' collection to work with.
        # In a real application, you would use a specific query.
        # For example: {'_id': ObjectId("some_specific_id")}
        message_to_process = messages_collection.find_one()
        if not message_to_process:
            print("No messages found in the collection. Please add a message to proceed.")
            return  # Exit if no message is found
        print(f"Found a message: {message_to_process}")

        # Find a document in the 'files' collection.
        # NOTE: Your 'files' collection structure is not shown, so we assume it has a 'cloudinaryUrl' field.
        file_to_process = files_collection.find_one()
        if not file_to_process:
            print("Warning: No files found in the collection. Cannot retrieve a cloudinaryUrl.")
        else:
            print(f"Found a file: {file_to_process}")

        # 3. STORE THE RETRIEVED CONTENT
        # --------------------------------
        # Store the content and URL in variables.
        original_message_content = message_to_process.get('content')
        # Use .get() for safe access in case the key doesn't exist
        cloudinary_url = file_to_process.get('cloudinaryUrl') if file_to_process else None

        print(f'\nStored Message Content: "{original_message_content}"')
        print(f'Stored Cloudinary URL: "{cloudinary_url or "Not Available"}"')

        # 4. UPDATE THE MESSAGE CONTENT
        # -----------------------------
        print("\n--- Updating Message ---")

        # The ID of the message we want to update.
        message_id_to_update = message_to_process.get('_id')
        new_content = "This is the new, updated content from the Python script!"

        # Create a filter for the message we want to update
        filter_doc = {'_id': message_id_to_update}

        # Create an update document that uses the $set operator
        update_doc = {
            "$set": {
                "content": new_content
            }
        }

        # Execute the update operation
        update_result = messages_collection.update_one(filter_doc, update_doc)
        print(f"{update_result.modified_count} document(s) was/were updated.")

        # 5. RETURN AND STORE THE NEW CONTENT
        # -----------------------------------
        print("\n--- Fetching Updated Data ---")

        # Find the updated message by its ID to confirm the change
        updated_message = messages_collection.find_one(filter_doc)

        # Store the new content in a new variable
        updated_message_content = updated_message.get('content')

        print("Successfully retrieved the updated message.")
        print(f'New Stored Content: "{updated_message_content}"')

    except ConnectionFailure as e:
        print(f"Could not connect to MongoDB: {e}")
    except OperationFailure as e:
        print(f"An operation failed: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
    finally:
        # Ensures that the client will close when you finish/error
        client.close()
        print("\nConnection to MongoDB closed.")

if __name__ == "__main__":
    main()

Successfully connected to MongoDB Atlas!

--- Fetching Initial Data ---
Found a message: {'_id': ObjectId('68956fb365add8b9bc75f117'), 'content': 'This is the new, updated content from the Python script!', 'sender': 'user', 'createdAt': datetime.datetime(2025, 8, 8, 3, 32, 3, 594000), '__v': 0}
Found a file: {'_id': ObjectId('689578e065add8b9bc75f14a'), 'name': 'Workpilot Dashboard.pdf', 'type': 'application/pdf', 'size': 94960, 'cloudinaryUrl': 'https://res.cloudinary.com/dm9j97lv3/raw/upload/v1754626272/pdfs/1754626259628', 'uploadedAt': datetime.datetime(2025, 8, 8, 4, 11, 12, 625000), '__v': 0}

Stored Message Content: "This is the new, updated content from the Python script!"
Stored Cloudinary URL: "https://res.cloudinary.com/dm9j97lv3/raw/upload/v1754626272/pdfs/1754626259628"

--- Updating Message ---
0 document(s) was/were updated.

--- Fetching Updated Data ---
Successfully retrieved the updated message.
New Stored Content: "This is the new, updated content from the Python scr

In [None]:
# This script demonstrates how to connect to a MongoDB database,
# fetch data from two different collections, update a document,
# and retrieve the updated data using the pymongo library.

import os
from pymongo import MongoClient
from pymongo.errors import ConnectionFailure, OperationFailure
from bson.objectid import ObjectId

# --- IMPORTANT: REPLACE WITH YOUR DETAILS ---
# It's recommended to use environment variables for sensitive data.
# Or, you can hardcode the string directly.
# Example: uri = "mongodb+srv://<user>:<password>@<cluster-url>/test?retryWrites=true&w=majority"
uri = os.environ.get("MONGO_URI", "mongodb+srv://<user>:<password>@<cluster-url>/test?retryWrites=true&w=majority")

DB_NAME = 'test'  # The database name from your screenshot
MESSAGES_COLLECTION_NAME = 'messages'  # The messages collection
FILES_COLLECTION_NAME = 'files'  # The files collection
# --- END OF CONFIGURATION ---

# Create a new client and connect to the server
client = MongoClient(uri)


client.admin.command('ismaster')

db = client[DB_NAME]

messages_collection = db[MESSAGES_COLLECTION_NAME]
files_collection = db[FILES_COLLECTION_NAME]

message_to_process = messages_collection.find_one()

# print(f"Found a message: {message_to_process}")

file_to_process = files_collection.find_one()

original_message_content = message_to_process.get('content')
cloudinary_url = file_to_process.get('cloudinaryUrl') if file_to_process else None

message_id_to_update = message_to_process.get('_id')
new_content = "This is the new, updated content from the Python script!"

filter_doc = {'_id': message_id_to_update}

# Create an update document that uses the $set operator
update_doc = {
    "$set": {
        "content": new_content
    }
}

update_result = messages_collection.update_one(filter_doc, update_doc)
# print(f"{update_result.modified_count} document(s) was/were updated.")

# -----------------------------------

# updated_message = messages_collection.find_one(filter_doc)

# updated_message_content = updated_message.get('content')


print("original_message_content " + str(original_message_content))
print("cloudinary_url " + str(cloudinary_url))
# print("updated_message"+ str(updated_message))
# print("updated_message_content" + str(updated_message_content))
# print("filter_doc" + str(filter_doc))




import requests
import fitz  # PyMuPDF

# Your Cloudinary URL
# cloudinary_url = "https://res.cloudinary.com/demo/raw/upload/v1690000000/sample.pdf"

# Step 1: Download the PDF
response = requests.get(cloudinary_url)
pdf_bytes = response.content

# Step 2: Load directly from bytes using PyMuPDF
doc = fitz.open(stream=pdf_bytes, filetype="pdf")


def chunk_data(docs,chunk_size=800,chunk_overlap=50):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=chunk_size,chunk_overlap=chunk_overlap)
    doc=text_splitter.split_documents(docs)
    return docs

documents=chunk_data(docs=doc)

embeddings=OpenAIEmbeddings(api_key=os.environ['OPENAI_API_KEY'])
embeddings

pinecone = Pinecone(api_key="pcsk_63wWd4_5pQ9Xw8dwowhmA7jcmDxqbsACkpqUESsC14qCL3cWVrZ5R68ZWjc9zk3GMU4bTg")
index = pinecone.Index("quickstart")


index_name="ragqa"
index=Pinecone.from_documents(doc,embeddings,index_name=index_name)
def retrieve_query(query,k=2):
    matching_results=index.similarity_search(query,k=k)
    return matching_results



original_message_content This is the new, updated content from the Python script!
cloudinary_url https://res.cloudinary.com/dm9j97lv3/raw/upload/v1754626272/pdfs/1754626259628


In [None]:


import requests
import fitz  # PyMuPDF

# Your Cloudinary URL
# cloudinary_url = "https://res.cloudinary.com/demo/raw/upload/v1690000000/sample.pdf"

# Step 1: Download the PDF
response = requests.get(cloudinary_url)
pdf_bytes = response.content

# Step 2: Load directly from bytes using PyMuPDF
pdf_doc = fitz.open(stream=pdf_bytes, filetype="pdf")

# Step 3: Read content (Example: Print text from all pages)
for page_num in range(len(pdf_doc)):
    page = pdf_doc[page_num]
    print(f"\n--- Page {page_num + 1} ---")
    print(page.get_text())



--- Page 1 ---
WorkPilot 
Roles 
• Admin  
• Team Lead (Project Lead) 
• Member 
Common Components 
1. Projects Section 
Each project card displays: 
• Project Title 
• Description 
• Domain 
• Status Badge (Active / Completed / Disabled) 
• Team Lead Name, Assistant Lead Name 
• Member Count 
• “Open Project” button (visible only after approval for Members otherwise 
“Requested”) 
Access per Role: 
Role 
Permissions 
Admin 
Approves project proposals, creates final project cards 
Team Lead 
Manages projects they lead, adds/removes members, toggles open/closed 
status, Approve project proposals 
Member 
Views all projects, applies to open ones, views joined projects 
2. Announcements Page 
Each announcement includes: 
• Sender’s Profile and Name 
• Content 
• Date and Timestamp 
• Grouped by Month 
 
 


--- Page 2 ---
Access per Role: 
Role 
Permissions 
Admin 
Create, edit, pin, delete announcements 
Team Lead 
Create, edit, delete, pin 
Member 
View only 
3. Members Section (In a s

In [24]:
# save the PDF to a file (optional)

with open("downloaded_file.pdf", "wb") as f:
    f.write(pdf_bytes)