In [None]:
# Install required packages
# !pip install langchain langgraph faiss-cpu wikipedia langsmith sentence-transformers

# Imports
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableMap
from langchain.tools import WikipediaQueryRun
from langchain.utilities import WikipediaAPIWrapper
from langchain.text_splitter import RecursiveCharacterTextSplitter, SentenceTransformersTokenTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langgraph.graph import StateGraph
from langchain_core.tracers.langchain import LangChainTracer

import wikipedia
import wikipediaapi
import requests
from bs4 import BeautifulSoup 
import os
import ast
import re
import time
import pandas as pd 

In [None]:
# LangSmith setup
os.environ["LANGCHAIN_API_KEY"] = ""
os.environ["LANGCHAIN_PROJECT"] = "mcq_solver"

tracer = LangChainTracer(project_name="mcq_solver")

In [None]:
aval_llm = lambda: ChatOpenAI(
    model="meta.llama3-1-8b-instruct-v1:0",
    api_key="",
    base_url=""
)

llm_first_guess = aval_llm()
llm_rephraser = aval_llm()
llm_comparator = aval_llm()
llm_expander = aval_llm()
llm_decomposer = aval_llm()

# Wikipedia tool
wiki_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())

# Embeddings
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [None]:
wiki_wiki = wikipediaapi.Wikipedia(
    language='en',
    user_agent=''
)

embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)

def _get_full_text_for_title(title: str) -> str:
    """Try wikipediaapi first; fall back to simple HTML scraping if needed."""
    try:
        page = wiki_wiki.page(title)
        if page.exists() and page.text and page.text.strip():
            return page.text
    except Exception:
        # continue to scraping fallback
        pass

    # Fallback: scrape the article paragraphs (respectful User-Agent)
    url = f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}"
    try:
        r = requests.get(url, headers={'User-Agent': ''}, timeout=8)
        if r.status_code == 200:
            soup = BeautifulSoup(r.text, "html.parser")
            paras = [p.get_text().strip() for p in soup.select("p") if p.get_text().strip()]
            if paras:
                return "\n".join(paras)
    except Exception:
        pass

    return ""


def get_wikipedia_page(title):
    """Get full text of a Wikipedia page using wikipedia-api"""
    page = wiki_wiki.page(title)
    if not page.exists():
        return ""
    return page.text


def scrape_wikipedia(title):
    """Scrape Wikipedia page with BeautifulSoup as fallback"""
    url = f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}"
    r = requests.get(url)
    if r.status_code != 200:
        return ""
    soup = BeautifulSoup(r.text, "html.parser")
    paragraphs = [p.text for p in soup.select("p")]
    return "\n".join(paragraphs)


def search_wikipedia(query, max_pages=3):
    """Search Wikipedia and return full text from top results"""
    try:
        titles = wikipedia.search(query, results=max_pages)
        texts = []
        for t in titles:
            text = get_wikipedia_page(t)
            if not text:
                text = scrape_wikipedia(t)  # fallback
            texts.append(text)
        return "\n".join(texts)
    except:
        return ""

In [None]:
def multi_wiki_search(queries, max_titles_per_query: int = 3) -> str:
    """
    For each query string in `queries`:
      - search wikipedia (top N titles),
      - fetch full page text for each title (api -> fallback scrape),
      - gather texts (deduplicated) and return a single combined string.

    This returns ONE big text blob (string). Your existing
    `retrieve_top_chunks(text, query)` will then split/index it and
    return the top snippets for the rephrased query — exactly like your
    current pipeline expects.
    """
    results = []
    seen_titles = set()

    for q in queries:
        if not q or not q.strip():
            continue

        # Use python-wikipedia search to get candidate titles
        try:
            titles = wikipedia.search(q, results=max_titles_per_query)
        except Exception:
            titles = []

        for t in titles:
            if t in seen_titles:
                continue
            seen_titles.add(t)
            txt = _get_full_text_for_title(t)
            if txt and len(txt.strip()) > 0:
                results.append(txt)


    if not results and 'wiki_tool' in globals():
        for q in queries:
            try:
                r = wiki_tool.run({"query": q.strip()})
                if r and r.strip():
                    results.append(r)
            except Exception:
                continue

    return "\n\n".join(results)


def build_hybrid_retriever(text):
    text_splitter = SentenceTransformersTokenTextSplitter(chunk_size=1, chunk_overlap=0)
    sentences = text_splitter.split_text(text)
    bm25 = BM25Retriever.from_texts(sentences)
    faiss_store = FAISS.from_texts(sentences, embedding_model) 
    faiss = faiss_store.as_retriever()
    return EnsembleRetriever(retrievers=[bm25, faiss], weights=[0.5, 0.5])

def retrieve_top_chunks(text, query, k=3):
    retriever = build_hybrid_retriever(text)
    docs = retriever.get_relevant_documents(query)
    return [doc.page_content for doc in docs[:k]]


In [None]:
# def multi_wiki_search(queries, question, k=5):
#     """Build a FAISS retriever over Wikipedia search results and return top-k chunks"""
#     combined_text = ""
#     for q in queries:
#         combined_text += search_wikipedia(q) + "\n"

#     # Split into chunks
#     chunks = splitter.split_text(combined_text)

#     if not chunks:
#         return ""

#     # Build FAISS index
#     db = FAISS.from_texts(chunks, embedder)
#     retriever = db.as_retriever(search_kwargs={"k": k})

#     # Retrieve most relevant chunks for the actual question
#     docs = retriever.get_relevant_documents(question)
#     return "\n".join([d.page_content for d in docs])


In [None]:
# Rephrase

first_guess_chain = (
    PromptTemplate.from_template("""
                                 Answer the following multiple choice question:
                                 
                                 Question:
                                 {question}
                                 
                                 Choices:
                                 {choices}
                                 
                                 only return the letter of the correct answer since it might cause conflict.
                                 Answer:
                                 """)
    | llm_first_guess
    | StrOutputParser()
)

rephrase_chain = (
    PromptTemplate.from_template("""
                                 Rephrase the following question into a single, concise Wikipedia search query. Avoid explanations. Just return the query.
                                 if the question is already a search query, just return it.
                                 
                                 Question:
                                {question}
                                
                                rephrased question:
                                """)
    | llm_rephraser
    | StrOutputParser()
).with_config(tags=["rephrase"], run_name="rephrase_chain", callbacks=[tracer])

# Expand
expand_chain = (
    PromptTemplate.from_template("""
                                 Extract the keywords containing technical terms and synonyms from the question and return zero to two search queries that are easy to search for on Wikipedia in order to find the answer.
                                 Try your best not to give too similar querries.
                                 
                                 You will also be given the potential answer to the question. analyze the answer, if you find the answer incorrect ignore it, otherwise write a query that can support your opinion by searching that query on Wikipedia.
                                 
                                 only return the queries in the format of a python list of strings.
                                 do not return anything else since it might cause conflict.
                                 important: if you cannot write any querry that improves the given query, just return "[]"
                                 do not return the exact given question.
                                 example:
                                 ["search query 1", "search query 2"]
                                 example:
                                 []
                                 
                                 Question:
                                 {question}
                                 
                                 Potential answer:
                                 {answer}
                                 
                                 queries:
                                """)
    | llm_expander
    | StrOutputParser()
).with_config(tags=["expand"], run_name="expand_chain", callbacks=[tracer])

# Decompose
# decompose_chain = (
#     PromptTemplate.from_template("Break this question into 2–3 simpler subquestions:\n\n{question}")
#     | llm_decomposer
#     | StrOutputParser()
# ).with_config(tags=["decompose"], run_name="decompose_chain", callbacks=[tracer])

In [None]:
#  Hybrid Retrieval can also be used but the current flow seems to work better!


# def multi_wiki_search(queries):
#     results = []
#     for q in queries:
#         try:
#             results.append(wiki_tool.run({"query": q.strip()}))
#         except:
#             continue
#     return "\n\n".join(results)

# # Hybrid retrieval
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)

# def build_hybrid_retriever(text):
#     chunks = text_splitter.split_text(text)
#     bm25 = BM25Retriever.from_texts(chunks)
#     faiss_store = FAISS.from_texts(chunks, embedding_model)
#     faiss = faiss_store.as_retriever()
#     return EnsembleRetriever(retrievers=[bm25, faiss], weights=[0.5, 0.5])

# def retrieve_top_chunks(text, query, k=3):
#     retriever = build_hybrid_retriever(text)
#     docs = retriever.get_relevant_documents(query)
#     return [doc.page_content for doc in docs[:k]]

In [None]:
compare_chain = (
    RunnableMap({
        "question": lambda x: x["question"],
        "context": lambda x: "\n\n".join(x["snippets"]),
        "A": lambda x: x["choices"][0],
        "B": lambda x: x["choices"][1],
        "C": lambda x: x["choices"][2],
        "D": lambda x: x["choices"][3],
        "E": lambda x: x["choices"][4],
    })
    | PromptTemplate.from_template(
        """You are a multiple-choice question solver.

Question:
{question}

Choices:
A. {A}
B. {B}
C. {C}
D. {D}
E. {E}

Wikipedia context:
{context}

Which choice is best supported? Respond with one letter: A, B, C, D, or E.
Return only the letter, no other text."""
    )
    | llm_comparator
    | StrOutputParser()
).with_config(tags=["compare"], run_name="compare_chain", callbacks=[tracer])

In [None]:
# def retrieve_evidence_per_option(question, option, retriever, k=3):
#     query = f"{question} {option}"
#     docs = retriever.get_relevant_documents(query)
#     return "\n".join([d.page_content for d in docs])


In [None]:
def retrieve_evidence_for_option(question, option, retriever, k=3):
    """Retrieve supporting Wikipedia chunks for a specific option"""
    query = f"{question} {option}"
    docs = retriever.get_relevant_documents(query)
    return "\n".join([d.page_content for d in docs[:k]])

In [None]:
def judge_option(llm, question, option, evidence):
    """Ask the LLM if evidence supports the option"""
    prompt = f"""
Question: {question}

Option: {option}

Evidence from Wikipedia:
{evidence}

Does the evidence support that this option is correct?
Answer strictly with one of: YES, NO, or UNKNOWN.
"""
    result = llm.invoke(prompt).content.strip().upper()
    if "YES" in result:
        return "YES"
    elif "NO" in result:
        return "NO"
    else:
        return "UNKNOWN"


In [None]:
letter_to_number = {"A":0, "B":1, "C":2, "D":3, "E":4}
def answer_mcq_graph(question, choices):
    """
    Multiple Choice QA with Wikipedia + Hybrid Retrieval.
    
    Steps:
    1. Use queries (from rephrase/expand/decompose chains).
    2. Retrieve Wikipedia text for all queries (multi_wiki_search).
    3. Split + hybrid index, then retrieve top chunks for the rephrased query.
    4. Compare each option against the retrieved evidence.
    5. Return the best option + detailed scores.
    """
     
    print(f"question: {question}, \n chouces: {choices} \n ----------------", )
    
    answer = first_guess_chain.invoke({"question": question, "choices": choices})
    print(f"answer: {answer} \n ---------------")
    
    rephrased = rephrase_chain.invoke({"question": question})
    print(f"rephrased: {rephrased} \n ---------------")
    
    letter_to_number = {"A":0, "B":1, "C":2, "D":3, "E":4}
    expanded = expand_chain.invoke({"answer": choices[letter_to_number[re.search(r'\b[A-E]\b', answer).group()]], "question": question})
    print(f"expanded: {expanded} \n ---------------")
    # decomposed = decompose_chain.invoke({"question": question})
    # print(f"decomposed: {decomposed} \n ---------------")
    
    clean_expanded = ''
    is_brace_open = False
    for i in expanded:
        if i == '[' or is_brace_open:
            clean_expanded += i 
            is_brace_open = True
        if i == ']':
            break
    print(f"expanded: {clean_expanded} \n ---------------")
    all_queries = [rephrased.replace('\n', '')] + ast.literal_eval(clean_expanded)
    if all_queries[0] == all_queries[1]:
        all_queries = all_queries[1:]
    print(f"all queries: {all_queries} \n -------------------------")

    # 1. Gather Wikipedia text from all queries
    wiki_text = multi_wiki_search(all_queries)
    if not wiki_text.strip():
        return None, {opt: "NO EVIDENCE" for opt in choices}

    # 2. Retrieve top chunks using hybrid retriever
    top_chunks = retrieve_top_chunks(wiki_text, rephrased, k=5)
    
    for chunk in top_chunks:
        print(f"chunk: {chunk}")
    print("--------------------------------")
    # 3. Ask comparator LLM to judge each option against evidence
    result = compare_chain.invoke({
        "question": question,
        "choices": choices,
        "snippets": top_chunks
    })
    
    print(f"result: {result} \n -------------------------")
    # 4. Parse answer
    best_answer = result.strip().upper()
    # Remove punctuation and extra words
    best_answer = best_answer.replace(".", "").replace("ANSWER:", "").strip()
    # Keep only first valid letter
    if best_answer and best_answer[0] in "ABCDE":
        best_answer = best_answer[0]
    else:
        return None, {c: "UNKNOWN" for c in choices}

    # 5. Map back to choices
    scores = {c: ("YES" if i == "ABCDE".index(best_answer) else "NO")
              for i, c in enumerate(choices)}

    return choices["ABCDE".index(best_answer)], scores

In [None]:
question = "What is the main sequence in astronomy?"
choices = [
    "A group of galaxies",
    "A type of telescope",
    "A phase in stellar evolution",
    "A planetary orbit",
    "A black hole classification"
]

answer = answer_mcq_graph(question, choices)
print("Predicted answer:", answer)

In [None]:
train_file = pd.read_csv("data/train_data.csv")

In [None]:
Choices = []
Questions = []
Answers = []
for index, row in train_file.iterrows():
    Choices.append([row['A'], row['B'], row['C'], row['D'], row['E']])
    Questions.append(row['prompt'])
    Answers.append(row['answer'])

In [None]:
predicted = []

In [None]:
for i in range (len(predicted),len(Questions)):
    predict = answer_mcq_graph(Questions[i], Choices[i])
    if predict != Answers[i]:
        predicted.append(re.search(r'\b[A-E]\b', predict).group())
        print("FINAL ANSWER:")
        print(predict, i)
    else:
        predicted.append("_")
    time.sleep(60)

In [None]:
train_file['predicted'] = predicted
train_file.to_csv("Predicted_by_AgenticAI.csv")