In [None]:
1+2

3

In [None]:
# pip install (if you haven't already)
# pip install langchain langchain-community chromadb sentence-transformers groq  # adjust package names as needed

#Thsi is for MCQ quizz Generator from PDF using Groq LLM and LangChain

import os
from langchain_groq import ChatGroq
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain

# 1) load PDF
loader = PyPDFLoader("DataStructuresNotes1.pdf")
documents = loader.load()  # returns list[Document]

# 2) split into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = splitter.split_documents(documents)

# 3) embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# 4) build chroma vectorstore (use `embedding=` keyword)
# Optionally set persist_directory="chroma_db" to persist on disk
db = Chroma.from_documents(documents=docs, embedding=embeddings, persist_directory=None)

# 5) retriever
retriever = db.as_retriever()

# 6) Groq LLM (ensure GROQ_API_KEY is set in your env)
groq_api_key = os.getenv("GROQ_API_KEY")
if not groq_api_key:
    raise EnvironmentError("Set GROQ_API_KEY in environment before running")

llm = ChatGroq(api_key=groq_api_key, model="llama-3.3-70b-versatile", temperature=0)

# 7) prompt template for MCQs
mcq_prompt = ChatPromptTemplate.from_template(
    """
You are an expert quiz creator. Using the following context from documents, generate {num_questions} high-quality multiple-choice questions.

Context:
{context}

Instructions:
1. Each question must have 1 correct answer and 3 incorrect but plausible options.
2. Highlight the correct answer clearly.
3. Format the output as:

Q1. <question>
A) option1
B) option2
C) option3
D) option4
Correct Answer: <letter>

Now generate the MCQs.
"""
)

# 8) create document-combiner chain and retrieval chain
mcq_doc_chain = create_stuff_documents_chain(llm=llm, prompt=mcq_prompt)
retrieval_chain = create_retrieval_chain(retriever=retriever, combine_docs_chain=mcq_doc_chain)

# 9) invoke the chain
result = retrieval_chain.invoke({"input": "Generate MCQs from the document", "num_questions": 5})

# result is a dict containing at least 'answer' and 'context' (see docs)
print("=== Generated MCQs ===\n")
print(result.get("answer"))

  from .autonotebook import tqdm as notebook_tqdm
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


=== Generated MCQs ===

Here are 5 high-quality multiple-choice questions based on the provided context:

Q1. What is the purpose of the `displayQ()` function in the given code?
A) To insert an element into the queue
B) To delete an element from the queue
C) To display the elements in the queue
D) To create a new node

Correct Answer: **C**

Q2. Which function is used to create 'n' number of nodes in a circular doubly linked list?
A) `cdll_insert_beg()`
B) `cdll_createlist()`
C) `cdll_display_left_right()`
D) `getnode()`

Correct Answer: **B**

Q3. What is the action performed by the `deleteQ()` function in the given code?
A) It inserts a new element at the beginning of the queue
B) It deletes an element from the queue and prints its value
C) It displays the elements in the queue
D) It creates a new node

Correct Answer: **B**

Q4. How is a new node inserted at the beginning of a circular doubly linked list?
A) By setting `newnode->left` to `start` and `newnode->right` to `start->left`

In [None]:
#The below is for finding top 5 links and getting important data using llms

In [None]:
from ddgs import DDGS

def get_top5_english_edu_links(query):
    """
    Returns top 5 English links related to an educational topic.
    
    Args:
        query (str): Search query.
        
    Returns:
        list: List of top 5 URLs.
    """
    links = []
    with DDGS() as ddgs:
        for r in ddgs.text(
            query,
            region='wt-wt',       # Worldwide English
            safesearch=True,
            max_results=10        # get 10 results to filter English
        ):
            url = r['href']
            # Simple English filter: .com, .edu, .org, .net
            if url and (".com" in url or ".edu" in url or ".org" in url or ".net" in url):
                links.append(url)
            if len(links) >= 5:
                break
    return links

# Example usage
if __name__ == "__main__":
    query = "Python programming tutorial for students"
    links = get_top5_english_edu_links(query)  # ✅ stores output in `links` variable
    print("Top 5 English educational links:")
    for i, link in enumerate(links, 1):
        print(f"{i}. {link}")


Top 5 English educational links:
1. https://stackoverflow.com/questions/26000198/what-does-colon-equal-in-python-mean
2. https://stackoverflow.com/questions/35569042/ssl-certificate-verify-failed-with-python3
3. https://stackoverflow.com/questions/25981703/pip-install-fails-with-connection-error-ssl-certificate-verify-failed-certi
4. https://stackoverflow.com/questions/6392739/what-does-the-at-symbol-do-in-python
5. https://stackoverflow.com/questions/3294889/iterating-over-dictionaries-using-for-loops


In [None]:
links

['https://stackoverflow.com/questions/26000198/what-does-colon-equal-in-python-mean',
 'https://stackoverflow.com/questions/35569042/ssl-certificate-verify-failed-with-python3',
 'https://stackoverflow.com/questions/25981703/pip-install-fails-with-connection-error-ssl-certificate-verify-failed-certi',
 'https://stackoverflow.com/questions/6392739/what-does-the-at-symbol-do-in-python',
 'https://stackoverflow.com/questions/3294889/iterating-over-dictionaries-using-for-loops']

In [None]:
def load_content_webbaseloader(links):
    docs = []
    for url in links:
        try:
            loader = WebBaseLoader(url)
            loaded_docs = loader.load()
            # Add source metadata
            for doc in loaded_docs:
                doc.metadata["source"] = url
            docs.extend(loaded_docs)
        except Exception as e:
            print(f"❌ Failed to load {url}: {e}")
    return docs


In [None]:
docs = load_content_webbaseloader(links)

In [None]:
def format_docs(docs, preview_chars=1500):
    formatted_outputs = []
    for i, doc in enumerate(docs, 1):
        content = doc.page_content.strip()
        if len(content) > preview_chars:
            content = content[:preview_chars] + "...\n[Content truncated]"
        formatted_text = f"""
{i}. Source: {doc.metadata.get('source', 'Unknown')}
----------------------------------------------------
{content}
"""
        formatted_outputs.append(formatted_text)
    return formatted_outputs

In [None]:
formatted_docs = format_docs(docs, preview_chars=1500)

In [None]:
formatted_docs

['\n1. Source: https://stackoverflow.com/questions/26000198/what-does-colon-equal-in-python-mean\n----------------------------------------------------\nWhat does colon equal (:=) in Python mean? - Stack Overflow\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\nSkip to main content\n\n\n\n\n\n\nStack Overflow\n\n\n\n\nAbout\n\n\n\n\nProducts\n\n\n\n\nFor Teams\n\n\n\n\n\n\n\n\nStack Overflow for Teams\nWhere developers & technologists share private knowledge with coworkers\n\n\n\n\nAdvertising\nReach devs & technologists worldwide about your product, service or employer brand\n\n\n\n\nKnowledge Solutions\nData licensing offering for businesses to build and improve AI tools and models\n\n\n\n\nLabs\nThe future of collective knowledge sharing\n\n\n\nAbout the company\nVisit the blog\n\n\n\n\n\n\n\n\n\n\n\n\nLoading…\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\ncurrent community\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n            Stack Overflow\n        \n\n\n\nhelp\nchat\n\n\n\