In [41]:
import time
from pinecone import Pinecone, ServerlessSpec
from langchain_groq import ChatGroq


In [42]:
# Initialize Pinecone
def initialize_pinecone(api_key, environment):
    return Pinecone(api_key=api_key, environment=environment)


In [43]:
# Initialize Pinecone Index
pinecone_api_key = "pcsk_3CYnJi_TZbGr8CeCcVxAsz4Li7J5n5hNBRqM7PA7k6xGKx7ftNXUYMYUJLJcb3PZrTneH4"
pinecone_environment = "us-west1-gcp"
pc = initialize_pinecone(api_key=pinecone_api_key, environment=pinecone_environment)

### Code To retrieve all Subjects Topics

In [44]:
def retrieve_topics_from_namespaces(index, namespaces):
    """
    Retrieve topics from multiple namespaces and store them in separate lists.

    :param index: Pinecone Index object.
    :param namespaces: List of namespaces to query.
    :return: A tuple containing two lists: one for topics in the first namespace and one for topics in the second namespace.
    """
    topics_by_namespace = {}

    for namespace in namespaces:
        topics = set()  # Using set to avoid duplicate topics
        try:
            # Query with an empty vector to fetch all items in the namespace
            response = index.query(vector=[0] * 768, namespace=namespace, top_k=1000, include_metadata=True)
            # Extract topics from the metadata
            for match in response["matches"]:
                metadata = match.get("metadata", {})
                if "topic" in metadata:
                    topics.add(metadata["topic"])
        except Exception as e:
            print(f"Error retrieving metadata from namespace '{namespace}': {e}")

        # Store the topics for each namespace
        topics_by_namespace[namespace] = list(topics)  # Convert set to list

    return topics_by_namespace

# Example usage:
index_name = "mcq-index"
index = pc.Index(index_name)

# Specify the namespaces
namespaces = ["computer_organization", "operating_system"]

# Retrieve topics from both namespaces
topics_by_namespace = retrieve_topics_from_namespaces(index, namespaces)
print(topics_by_namespace)
# Print topics for each namespace
for namespace, topics in topics_by_namespace.items():
    print(f"Topics in namespace '{namespace}':")
    for topic in topics:
        print(f"- {topic}")


{'computer_organization': ['Computer Organization Architecture', 'Number Representation', 'Microprocessor', 'Digital Logic Number Representation'], 'operating_system': ['Process Management', 'Unix', 'Cpu Scheduling', 'Memory Management', 'Input Output Systems', 'Dead Lock']}
Topics in namespace 'computer_organization':
- Computer Organization Architecture
- Number Representation
- Microprocessor
- Digital Logic Number Representation
Topics in namespace 'operating_system':
- Process Management
- Unix
- Cpu Scheduling
- Memory Management
- Input Output Systems
- Dead Lock


In [45]:
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity


# Initialize the embedding model
embedding_model = SentenceTransformer("multi-qa-mpnet-base-dot-v1")

# Example list of topics from metadata (this would be the extracted topics for both namespaces)
extracted_topics_computer_organization = ['Microprocessor', 'Computer Organization Architecture', 'Digital Logic Number Representation',
                                          'Number Representation', 'Dead Lock', 'Cpu Scheduling']
extracted_topics_operating_system = ['Unix', 'Process Management', 'Memory Management', 'Input Output Systems']

# Function to extract the most relevant topic based on semantic similarity
def extract_topic_from_query(query, topics, model):
    # Encode the query and topics using the embedding model
    query_embedding = model.encode([query])[0]
    topic_embeddings = model.encode(topics)

    # Compute cosine similarity between the query and each topic
    similarities = cosine_similarity([query_embedding], topic_embeddings)[0]

    # Get the index of the most similar topic
    most_similar_index = np.argmax(similarities)

    # Return the most similar topic
    return topics[most_similar_index]

# Function to retrieve MCQs based on the topic and question range from Pinecone metadata
def get_mcqs_by_topic(index, topic, namespaces, question_range=None):
    try:
        # Prepare filters based on query restrictions
        filter_conditions = {"topic": topic}
        # Query Pinecone to get all records for the given topic with the additional conditions
        query_response = index.query(
            vector=[0] * 768,  # Querying with a zero vector (this part is a placeholder for the search vector)
            filter=filter_conditions,  # Apply the filter based on topic and additional conditions
            top_k=100,  # Number of MCQs to retrieve (adjust as needed)
            namespace=namespaces,  # Search across both namespaces
            include_metadata=True
        )

        # Extract and return the MCQs for the given topic
        mcqs = []
        for match in query_response['matches']:
            mcqs.append(match['metadata'])

        return mcqs

    except Exception as e:
        print(f"Error retrieving MCQs for topic '{topic}': {e}")
        return []

# Function to handle queries and get MCQs
def handle_query(query, topics_computer_organization, topics_operating_system, index, namespaces, model, question_range=None):
    # Combine topics from both namespaces
    topics = topics_computer_organization + topics_operating_system

    # Extract the most relevant topic from the query based on the combined list
    topic = extract_topic_from_query(query, topics, model)

    if topic:
        print(f"Query identified the topic: {topic}")

        # Get MCQs associated with the extracted topic and additional restrictions (if any)
        mcqs_for_topic = get_mcqs_by_topic(index, topic, namespaces, question_range)

        if mcqs_for_topic:
            # Print the MCQs
            print(f"MCQs for topic '{topic}':")
            for mcq in mcqs_for_topic:
                print(f"Question No: {mcq['question_no']}")
                print(f"Question: {mcq['question_text']}")
                print(f"Options: {', '.join(mcq['options'])}")
                print(f"Correct Option: {mcq['correct_option']}")
                print(f"Image Link: {mcq['question_img_link']}")
                print("-" * 50)
        else:
            print(f"No MCQs found for topic '{topic}'.")
    else:
        print(f"No relevant topic found in the query.")

# Example usage
query = "Tell me about the Process Management topic, but I want questions 1 to 10"
namespaces = ["computer_organization", "operating_system"]  # Search across both namespaces
index = pc.Index("mcq-index")  # Assuming `pc` is initialized and connected

# Define query restrictions (if any)
question_range = (1, 10)  # Example: Retrieve questions from 1 to 10

# Handle the query and get MCQs for the identified topic with restrictions
handle_query(query, extracted_topics_computer_organization, extracted_topics_operating_system, index, namespaces, embedding_model, question_range)


Query identified the topic: Process Management
Error retrieving MCQs for topic 'Process Management': (400)
Reason: Bad Request
HTTP response headers: HTTPHeaderDict({'Date': 'Tue, 31 Dec 2024 20:58:44 GMT', 'Content-Type': 'text/plain', 'Content-Length': '50', 'Connection': 'keep-alive', 'server': 'envoy'})
HTTP response body: : Proto field is not repeating, cannot start list.

No MCQs found for topic 'Process Management'.
