In [2]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders.mongodb import MongodbLoader
import nest_asyncio
nest_asyncio.apply()
load_dotenv()

True

In [5]:
from pymongo import MongoClient

class MongodbLoader:
    def __init__(self, connection_string, db_name, collection_name, filter_criteria=None, field_names=None):
        self.client = MongoClient(connection_string)
        self.db = self.client[db_name]
        self.collection = self.db[collection_name]
        self.filter_criteria = filter_criteria or {}
        self.field_names = {field: 1 for field in field_names} if field_names else {}

    def load_data(self):
        documents = self.collection.find(self.filter_criteria, self.field_names)
        return list(documents)

In [6]:
# Usage
loader = MongodbLoader(
    connection_string="mongodb://localhost:27017/",
    db_name="data1",
    collection_name="mcqs",
    filter_criteria={},  # Add any specific criteria if needed, e.g., {"question": {"$regex": "Question 1"}}
    field_names=["question", "options", "correct_answer"]  # Specify the fields you want
)

In [10]:
docs = loader.load_data()

len(docs)

26

In [11]:
docs[0]

{'_id': ObjectId('6724b017b8f4d57b980c3b6d'),
 'question': 'Question 1: What happens when a chemical change occurs?',
 'options': ['A) A physical change takes place',
  'B) A chemical reaction has taken place',
  'C) No change occurs',
  'D) A biological reaction has taken place'],
 'correct_answer': ''}

In [13]:
from budserve.models.langchain import BudServeClient
from langchain_core.prompts import PromptTemplate
from langchain.schema.output_parser import StrOutputParser
import os

In [14]:
api_key=os.getenv('llama_api_key')

In [None]:
from budserve.models.langchain import BudServeClient
api_key=os.getenv('llama_api_key')
llm = BudServeClient(base_url="https://rag-llm-api.accubits.cloud/v1",
                 model_name="meta-llama/Meta-Llama-3-8B-Instruct",
                 api_key=api_key,
                 max_tokens=500)

In [None]:
import os
from qdrant_client import QdrantClient
from pymongo import MongoClient
import numpy as np
import uuid

# MongoDB Credentials
mongo_host = "mongodb://localhost:27017/"
mongo_db_name = "data1"  # Replace with your database name
mongo_collection_name = "mcqs"  # Replace with your collection name

# Qdrant Credentials
QDRANT_URL = os.getenv('QDRANT_URL')  # Qdrant endpoint URL
QDRANT_API_KEY = os.getenv('QDRANT_API_KEY')  # Qdrant API key

# Initialize the MongoDB client
mongo_client = MongoClient(mongo_host)

# Set the MongoDB database and collection
db = mongo_client[mongo_db_name]
collection = db[mongo_collection_name]

# Connect to Qdrant instance
qdrant_client = QdrantClient(
    url=QDRANT_URL,
    api_key=QDRANT_API_KEY,
)

# Check if the collection exists in Qdrant
collection_name = "mcqs_collection"
your_vector_size = 128  # Set this to the size of your embeddings

if not qdrant_client.collection_exists(collection_name):
    qdrant_client.create_collection(
        collection_name=collection_name,
        vectors_config={
            "size": your_vector_size,
            "distance": "Cosine"  # or another distance metric
        }
    )
    print(f"Collection '{collection_name}' created successfully.")
else:
    print(f"Collection '{collection_name}' already exists.")

# Retrieve the MCQs data from MongoDB
mcqs_data = list(collection.find())

# Prepare documents for upload
points = []
batch_size = 100  # Define the batch size for uploads

for i, doc in enumerate(mcqs_data):
    # Prepare the point with necessary data
    # Replace this with your actual embedding generation logic
    embedding = np.random.rand(your_vector_size).tolist()  # Dummy embedding; replace with actual embeddings

    point = {
        "id": str(uuid.uuid4()),  # Generate a valid UUID for the point ID
        "vector": embedding,
        "payload": {
            "question": doc.get("question", ""),
            "options": doc.get("options", [])
        }
    }
    points.append(point)

    # Upload in batches to avoid serialization issues
    if (i + 1) % batch_size == 0 or i == len(mcqs_data) - 1:
        try:
            qdrant_client.upsert(collection_name=collection_name, points=points)
            print(f"Uploaded batch of {len(points)} points to Qdrant.")
            points = []  # Reset points after upload
        except Exception as e:
            print(f"Error uploading to Qdrant: {e}")

print("Data upload process completed.")


Collection 'mcqs_collection' created successfully.
Uploaded batch of 52 points to Qdrant.
Data upload process completed.


In [48]:
import logging
from pymongo import MongoClient
#from pymongo.errors import ConnectionError  # Re-enabled this import
from sentence_transformers import SentenceTransformer
import numpy as np

# Configure logging
logging.basicConfig(level=logging.INFO)

# Connect to MongoDB
try:
    client = MongoClient('mongodb://localhost:27017/')
    db = client['data1']  # Replace with your database name
    collection = db['mcqs']  # Replace with your collection name
except ConnectionError as e:  # Adjusted to handle only ConnectionError
    logging.error(f"Error connecting to MongoDB: {e}")
    raise

# Fetch documents from MongoDB
def fetch_documents():
    try:
        documents = list(collection.find())  # Convert cursor to list
        logging.info(f"Fetched {len(documents)} documents from MongoDB.")  # Use len() for counting
        return documents
    except Exception as e:
        logging.error(f"Error fetching documents: {e}")
        return []

# Initialize vector store
def initialize_vector_store(model):
    try:
        # Sample initialization code for a vector store
        vectors = []
        for doc in fetch_documents():
            if 'question' in doc and 'options' in doc:
                question = doc['question']
                options = doc['options']
                vector = model.encode(question)  # Use a sentence transformer model to encode questions
                vectors.append((vector, options))
            else:
                logging.warning(f"Skipping document due to missing or invalid fields: {doc['_id']}")
        return vectors
    except Exception as e:
        logging.error(f"Error initializing vector store: {e}")
        return None

# Load the Sentence Transformer model
try:
    model = SentenceTransformer('all-MiniLM-L6-v2')  # You can choose a different model as needed
    vector_store = initialize_vector_store(model)
    if vector_store is None:
        logging.error("Vector store is not initialized.")
except Exception as e:
    logging.error(f"Error loading the model: {e}")

# Example usage
if __name__ == "__main__":
    # Fetch and process documents
    documents = fetch_documents()
    if documents:
        for doc in documents:
            logging.info(doc)


INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2
INFO:root:Fetched 52 documents from MongoDB.
Batches: 100%|██████████| 1/1 [00:00<00:00, 45.42it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 71.42it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 71.59it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 67.26it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 76.95it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 99.96it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 90.79it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 117.49it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 83.25it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 92.18it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 83.33it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 86.88it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 76.91it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 71.23it/s]
Batches

In [51]:
import logging
from pymongo import MongoClient
#from pymongo.errors import ConnectionError
from sentence_transformers import SentenceTransformer
import numpy as np

# Configure logging
logging.basicConfig(level=logging.INFO)

# Connect to MongoDB
try:
    client = MongoClient('mongodb://localhost:27017/')
    db = client['data1']  # Replace with your database name
    collection = db['mcqs']  # Replace with your collection name
except ConnectionError as e:
    logging.error(f"Error connecting to MongoDB: {e}")
    raise

# Fetch documents from MongoDB
def fetch_documents():
    try:
        documents = list(collection.find())
        logging.info(f"Fetched {len(documents)} documents from MongoDB.")
        return documents
    except Exception as e:
        logging.error(f"Error fetching documents: {e}")
        return []

# Initialize vector store
def initialize_vector_store(model):
    try:
        vectors = []
        for doc in fetch_documents():
            if 'question' in doc and 'options' in doc:
                question = doc['question']
                options = doc['options']
                vector = model.encode(question)
                vectors.append((vector, options, doc))  # Store the entire doc
            else:
                logging.warning(f"Skipping document due to missing or invalid fields: {doc['_id']}")
        return vectors
    except Exception as e:
        logging.error(f"Error initializing vector store: {e}")
        return None

# Load the Sentence Transformer model
try:
    model = SentenceTransformer('all-MiniLM-L6-v2')
    vector_store = initialize_vector_store(model)
    if vector_store is None:
        logging.error("Vector store is not initialized.")
except Exception as e:
    logging.error(f"Error loading the model: {e}")

# Function to query the vector store
def query_vector_store(user_query):
    try:
        query_vector = model.encode(user_query)
        similarities = []
        
        for vector, options, doc in vector_store:
            # Compute cosine similarity
            similarity = np.dot(query_vector, vector) / (np.linalg.norm(query_vector) * np.linalg.norm(vector))
            similarities.append((similarity, options, doc))
        
        # Sort by similarity and get the top match
        similarities.sort(key=lambda x: x[0], reverse=True)
        return similarities[0] if similarities else None  # Return the top result
    except Exception as e:
        logging.error(f"Error querying vector store: {e}")
        return None

# Example usage
if __name__ == "__main__":
    # Example user query
    user_query = "What happens when a chemical change occurs?"
    result = query_vector_store(user_query)
    
    if result:
        similarity, options, doc = result
        logging.info(f"Best match found with similarity {similarity:.4f}: {doc['question']}")
        logging.info(f"Options: {options}")
    else:
        logging.info("No matching document found.")


INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2


INFO:root:Fetched 52 documents from MongoDB.
Batches: 100%|██████████| 1/1 [00:00<00:00, 25.63it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 100.01it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 83.34it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 64.97it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 79.92it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 76.87it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 79.67it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 76.92it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 83.36it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 64.50it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 76.84it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 66.47it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 71.42it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 66.64it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 124.87it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 76.86it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 71.44it/s]
Batches: 100%|████

In [53]:
user_query = "What is a chemical reaction?"
result = query_vector_store(user_query)
    
if result:
    similarity, options, doc = result
    #logging.info(f"Best match found with similarity {similarity:.4f}: {doc['question']}")
    logging.info(f"Options: {options}")
else:
    logging.info("No matching document found.")

Batches: 100%|██████████| 1/1 [00:00<00:00, 66.66it/s]
INFO:root:Options: ['A) A change in shape', 'B) A change in color', 'C) A change in which a substance combines with another substance', 'D) A change in which a substance breaks down']
