In [1]:
!pip install pinecone-client

Defaulting to user installation because normal site-packages is not writeable


In [2]:
pip install sentence-transformers

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


#### This Function Give Topics Name Of both Subjects

In [4]:
def retrieve_topics_from_namespaces(index, namespaces):
    """
    Retrieve topics from multiple namespaces and store them in separate lists.

    :param index: Pinecone Index object.
    :param namespaces: List of namespaces to query.
    :return: A tuple containing two lists: one for topics in the first namespace and one for topics in the second namespace.
    """
    topics_by_namespace = {}

    for namespace in namespaces:
        topics = set()  # Using set to avoid duplicate topics
        try:
            # Query with an empty vector to fetch all items in the namespace
            response = index.query(vector=[0] * 768, namespace=namespace, top_k=1000, include_metadata=True)
            # Extract topics from the metadata
            for match in response["matches"]:
                metadata = match.get("metadata", {})
                if "topic" in metadata:
                    topics.add(metadata["topic"])
        except Exception as e:
            print(f"Error retrieving metadata from namespace '{namespace}': {e}")

        # Store the topics for each namespace
        topics_by_namespace[namespace] = list(topics)  # Convert set to list

    return topics_by_namespace

# Example usage:
index_name = "mcq-index"
index = pc.Index(index_name)

# Specify the namespaces
namespaces = ["computer_organization", "operating_system"]

# Retrieve topics from both namespaces
topics_by_namespace = retrieve_topics_from_namespaces(index, namespaces)
print(topics_by_namespace)
# Print topics for each namespace
for namespace, topics in topics_by_namespace.items():
    print(f"Topics in namespace '{namespace}':")
    for topic in topics:
        print(f"- {topic}")


{'computer_organization': ['Digital Logic Number Representation', 'Number Representation', 'Computer Organization Architecture', 'Microprocessor'], 'operating_system': ['Memory Management', 'Dead Lock', 'Unix', 'Cpu Scheduling', 'Process Management', 'Input Output Systems']}
Topics in namespace 'computer_organization':
- Digital Logic Number Representation
- Number Representation
- Computer Organization Architecture
- Microprocessor
Topics in namespace 'operating_system':
- Memory Management
- Dead Lock
- Unix
- Cpu Scheduling
- Process Management
- Input Output Systems


### Below Function Extract all Mcqs from Both Subjects that best Fit the User Query

In [1]:
# prompt: above is code is only giving result from one namespace i need best match of 10 

from pinecone import Pinecone
from sentence_transformers import SentenceTransformer
def search_mcqs_by_query(index, query, namespaces, top_k=30):
    """
    Searches for MCQs across multiple namespaces and returns the best matches.
    """
    model = SentenceTransformer('all-mpnet-base-v2')
    query_embedding = model.encode(query)
    all_results = []

    for namespace in namespaces:
        try:
            response = index.query(vector=query_embedding.tolist(), namespace=namespace, top_k=top_k, include_metadata=True)
            for match in response["matches"]:
                all_results.append({
                    "text": match["id"],
                    "metadata": match["metadata"],
                    "score": match["score"],
                    "namespace": namespace
                })
        except Exception as e:
            print(f"Error searching namespace '{namespace}': {e}")

    # Sort all results by score in descending order
    all_results.sort(key=lambda x: x["score"], reverse=True)
    return all_results[:top_k]  # Return top_k best matches across all namespaces

def main():
    # Example usage
    pc = Pinecone(api_key="pcsk_3CYnJi_TZbGr8CeCcVxAsz4Li7J5n5hNBRqM7PA7k6xGKx7ftNXUYMYUJLJcb3PZrTneH4", environment="us-west1-gcp")
    index_name = "mcq-index"
    index = pc.Index(index_name)

    namespaces = ["computer_organization", "operating_system"]

    search_query = input("Enter your search query: ")
    top_k = int(input("How many mcqs you want to generate: "))
    mcq_results = search_mcqs_by_query(index, search_query, namespaces,top_k)


    if mcq_results:
        print(f"Top {len(mcq_results)} MCQs for '{search_query}' across namespaces:")
        for result in mcq_results:
            print(f"Text: {result['text']}")
            print(f"Metadata: {result['metadata']}")
            print(f"Score: {result['score']}")
            print(f"Namespace: {result['namespace']}")
            print("---")
    else:
        print(f"No MCQs found for '{search_query}'.")
if __name__ == "__main__":
    main()


Top 22 MCQs for 'cpu' across namespaces:
Text: Computer Organization Architecture-Question150
Metadata: {'correct_option': 'Sun-way TaihuLight', 'options': ['Sun-way TaihuLight', 'Titan', 'Piz Daint', 'Sequoia'], 'question_img_link': '', 'question_no': 'Question150', 'question_text': 'Which of the following Super Computers is the fastest Super Computer ?', 'topic': 'Computer Organization Architecture'}
Score: 0.431302458
Namespace: computer_organization
---
Text: Cpu Scheduling-Question50
Metadata: {'correct_option': '70%', 'options': ['70%', '30%', '60%', '64%'], 'question_img_link': '', 'question_no': 'Question50', 'question_text': 'In a system using single processor, a new process arrives at the rate of six processes per minute and each such process requires seven seconds of service time. What is the CPU utilization?', 'topic': 'Cpu Scheduling'}
Score: 0.413965434
Namespace: operating_system
---
Text: Computer Organization Architecture-Question51
Metadata: {'correct_option': '1.4 se

### Below Function extract all Mcqs of User provided Topic Through Query

In [2]:
from pinecone import Pinecone
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

pc = Pinecone(api_key="pcsk_3CYnJi_TZbGr8CeCcVxAsz4Li7J5n5hNBRqM7PA7k6xGKx7ftNXUYMYUJLJcb3PZrTneH4") # Replace with your actual API key
index = pc.Index("mcq-index")

# Initialize embedding model
embedding_model = SentenceTransformer("multi-qa-mpnet-base-dot-v1")

# Topics grouped by namespace
topics_by_namespace = {
    'computer_organization': ['Digital Logic Number Representation', 'Number Representation', 'Computer Organization Architecture', 'Microprocessor'],
    'operating_system': ['Memory Management', 'Dead Lock', 'Unix', 'Cpu Scheduling', 'Process Management', 'Input Output Systems']
}

# Flatten all topics for semantic matching
all_topics = []
for namespace, topics in topics_by_namespace.items():
    all_topics.extend(topics)

# Function to find the best-matched topic using semantic similarity
def get_best_matched_topic(query, topics, model):
    query_embedding = model.encode([query])[0]
    topic_embeddings = model.encode(topics)
    similarities = cosine_similarity([query_embedding], topic_embeddings)[0]
    best_match_index = int(np.argmax(similarities))
    return topics[best_match_index]

# Function to retrieve all MCQs for the matched topic across namespaces
def get_mcqs_by_topic(index, topic, topics_by_namespace,top_k):
    all_mcqs = []
    for namespace, topics in topics_by_namespace.items():
        if topic in topics:
            try:
                response = index.query(
                    vector=[0] * 768,  # Placeholder vector
                    filter={"topic": topic},
                    namespace=namespace,
                    top_k=top_k,
                    include_metadata=True
                )
                for match in response.get('matches', []):
                    all_mcqs.append({
                        "question_no": match["id"],
                        "question_text": match["metadata"].get("question_text", "N/A"),
                        "options": match["metadata"].get("options", "N/A"),
                        "correct_option": match["metadata"].get("correct_option", "N/A"),
                        "question_img_link": match["metadata"].get("question_img_link", "N/A"),
                        "namespace": namespace
                    })
            except Exception as e:
                print(f"Error retrieving MCQs for topic '{topic}' in namespace '{namespace}': {e}")
    return all_mcqs

# Main script
def main():
    query = input("Enter your query: ")
    top_k = int(input("How many mcqs you want to generate: "))
    best_topic = get_best_matched_topic(query, all_topics, embedding_model)
    print(f"Best matched topic: {best_topic}")
    
    mcqs = get_mcqs_by_topic(index, best_topic, topics_by_namespace,top_k)
    
    if mcqs:
        print(f"\nMCQs for the topic '{best_topic}':\n")
        for mcq in mcqs:
            print(f"Question No: {mcq['question_no']}")
            print(f"Question: {mcq['question_text']}")
            print(f"Options: {mcq['options']}")
            print(f"Correct Option: {mcq['correct_option']}")
            print(f"Image Link: {mcq['question_img_link']}")
            print(f"Namespace: {mcq['namespace']}")
            print("---")
    else:
        print(f"No MCQs found for the topic '{best_topic}'.")
    
if __name__ == "__main__":
    main()

Best matched topic: Cpu Scheduling

MCQs for the topic 'Cpu Scheduling':

Question No: Cpu Scheduling-Question3
Question: Consider three CPU-intensive processes, which require 10, 20 and 30 time units and arrive at times 0, 2 and 6, respectively. How many context switches are needed if the operating system implements a shortest remaining time first scheduling algorithm? Do not count the context switches at time zero and at the end.
Options: ['1', '2', '3', '4']
Correct Option: 2
Image Link: 
Namespace: operating_system
---
Question No: Cpu Scheduling-Question4
Question: Which of the following process scheduling algorithm may lead to starvation
Options: ['FIFO', 'Round Robin', 'Shortest Job Next', 'None of the above']
Correct Option: Shortest Job Next
Image Link: 
Namespace: operating_system
---
Question No: Cpu Scheduling-Question5
Question: If the quantum time of round robin algorithm is very large, then it is equivalent to:
Options: ['First in first out', 'Shortest Job Next', 'Lotter

### Below function allow user to select subject and extract mcqs from that

In [3]:
from pinecone import Pinecone
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
def search_mcqs_by_namespace(index, query, namespace, top_k=50):
    """
    Searches for MCQs within a specific namespace and returns the best matches.
    
    :param index: Pinecone Index object.
    :param query: User's query as a string.
    :param namespace: The namespace to restrict the search to.
    :param top_k: Number of top matches to return.
    :return: A list of the most relevant MCQs ranked by similarity.
    """
    model = SentenceTransformer('all-mpnet-base-v2')
    query_embedding = model.encode(query)
    mcqs = []

    try:
        # Query the specific namespace
        response = index.query(
            vector=query_embedding.tolist(),
            namespace=namespace,
            top_k=top_k,
            include_metadata=True
        )
        for match in response["matches"]:
            mcqs.append({
                "text": match["id"],
                "metadata": match["metadata"],
                "score": match["score"],
                "namespace": namespace
            })
    except Exception as e:
        print(f"Error searching namespace '{namespace}': {e}")

    # Sort results by score in descending order
    mcqs.sort(key=lambda x: x["score"], reverse=True)
    return mcqs[:int(top_k)]
def main():
    # Initialize Pinecone and define namespaces
    pc = Pinecone(api_key="pcsk_3CYnJi_TZbGr8CeCcVxAsz4Li7J5n5hNBRqM7PA7k6xGKx7ftNXUYMYUJLJcb3PZrTneH4", environment="us-west1-gcp")
    index_name = "mcq-index"
    index = pc.Index(index_name)

    namespaces = ["computer_organization", "operating_system"]

    # Display available namespaces for user selection
    print("Available namespaces:")
    for i, ns in enumerate(namespaces, 1):
        print(f"{i}. {ns}")

    # Get user selection
    namespace_choice = int(input("Select a namespace (enter the number): "))
    if 1 <= namespace_choice <= len(namespaces):
        selected_namespace = namespaces[namespace_choice - 1]
        print(f"You selected namespace: {selected_namespace}")
    else:
        print("Invalid namespace selection.")
        exit()

    # Get search query and top_k from user
    search_query = input("Enter your search query: ")
    top_k = int(input("Enter how many MCQs you want to retrieve: "))

    # Perform search in the selected namespace
    mcq_results = search_mcqs_by_namespace(index, search_query, selected_namespace, top_k)

    # Display results
    if mcq_results:
        print(f"Top {len(mcq_results)} MCQs for '{search_query}' in namespace '{selected_namespace}':")
        for result in mcq_results:
            print(f"Text: {result['text']}")
            print(f"Metadata: {result['metadata']}")
            print(f"Score: {result['score']}")
            print("---")
    else:
        print(f"No MCQs found for '{search_query}' in namespace '{selected_namespace}'.")
if __name__ == "__main__":
    main()


Available namespaces:
1. computer_organization
2. operating_system


You selected namespace: computer_organization
Top 22 MCQs for 'cpu' in namespace 'computer_organization':
Text: Computer Organization Architecture-Question150
Metadata: {'correct_option': 'Sun-way TaihuLight', 'options': ['Sun-way TaihuLight', 'Titan', 'Piz Daint', 'Sequoia'], 'question_img_link': '', 'question_no': 'Question150', 'question_text': 'Which of the following Super Computers is the fastest Super Computer ?', 'topic': 'Computer Organization Architecture'}
Score: 0.431302458
---
Text: Computer Organization Architecture-Question51
Metadata: {'correct_option': '1.4 seconds', 'options': ['1.0 second', '1.2 seconds', '1.4 seconds', '1.6 seconds'], 'question_img_link': '', 'question_no': 'Question51', 'question_text': 'A CPU has a five-stage pipeline and runs at 1 GHz frequency. Instruction fetch happens in the first stage of the pipeline. A conditional branch instruction\r\ncomputes the target address and evaluates the condition in the third stage of the pipeline. The processor s