In [None]:
! pip install -U langchain_community langchain datasets faiss-cpu gradio langchain_experimental rank_bm25 sentence_transformers evaluate nltk rouge-score

In [None]:
import torch
print("GPU available:", torch.cuda.is_available())
print("Using device:", torch.device("cuda" if torch.cuda.is_available() else "cpu"))
torch.cuda.empty_cache()

In [13]:
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.embeddings import HuggingFaceEmbeddings
from transformers import pipeline
from langchain_community.llms import HuggingFacePipeline
from datasets import load_dataset
from langchain_core.runnables import RunnableLambda
from langchain_core.documents import Document
from langchain.prompts import ChatPromptTemplate
from langchain.load import dumps, loads
from operator import itemgetter
from langchain_experimental.text_splitter import SemanticChunker
from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever
from sentence_transformers import CrossEncoder
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
from evaluate import load
import nltk
nltk.download("punkt")

# Create LLM and Embeddings
def initialize_llm_and_embeddings():
    llm = HuggingFacePipeline(
        pipeline=pipeline(
            "text2text-generation",
            model="google/flan-t5-base",
            max_new_tokens=256,
        )
    )
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return llm, embeddings



def load_documents():
    ds = load_dataset("rag-datasets/rag-mini-wikipedia", "text-corpus")
    docs = [Document(page_content=entry["passage"]) for entry in ds["passages"]]
    return docs


def split_documents(docs, embeddings):
    splitter = SemanticChunker(
        embeddings=embeddings,
        breakpoint_threshold_type="percentile",
        breakpoint_threshold_amount=95
    )
    return splitter.split_documents(docs)


def create_retrievers(splits, embeddings):
    vectorstore = FAISS.from_documents(documents=splits, embedding=embeddings)
    retriever = vectorstore.as_retriever(search_kwargs={"k":50})
    bm25_retriever = BM25Retriever.from_documents(splits)
    ensemble_retriever = EnsembleRetriever(
        retrievers=[retriever, bm25_retriever],
        weights=[0.5, 0.5]
    )
    return ensemble_retriever


def create_query_generator(llm):
    template = """You are an AI language model assistant. Your task is to generate ten
different versions of the given user question to retrieve relevant documents from a vector
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search.
Provide these alternative questions separated by newlines. Original question: {question}"""
    prompt = ChatPromptTemplate.from_template(template)
    generate_queries = (
        prompt
        | llm
        | StrOutputParser()
        | (lambda x: x.split("\n"))
    )
    return generate_queries


def get_unique_union(documents: list[list]):
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    unique_docs = list(set(flattened_docs))
    return [loads(doc) for doc in unique_docs]

def create_reranking_retriever(base_retriever, model_name="cross-encoder/ms-marco-MiniLM-L-6-v2", top_k = 5):
    model = HuggingFaceCrossEncoder(model_name=model_name)
    reranker = CrossEncoderReranker(model=model, top_n = top_k)
    compression_retriever = ContextualCompressionRetriever(base_compressor=reranker, base_retriever=base_retriever)
    return compression_retriever


def print_context(inputs):
        context = inputs["context"]
        print("\n--- Retrieved Context ---\n")
        print(context)
        print("\n-------------------------\n")
        return inputs


def build_rag_chain(llm, retrieval_chain):
    prompt_template = """
You are an AI assistant. Given an incomplete sentence and some background context, complete the sentence in two to three grammatically correct and fluent English sentences.

Use the context provided below to make the continuation logically consistent, factually informative, and relevant.

Context:
{context}

Incomplete Sentence:
{sentence}

Completion:
"""
    prompt = ChatPromptTemplate.from_template(prompt_template)
    rag_chain = (
        {"context": retrieval_chain, "sentence": itemgetter("sentence") | RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
    return rag_chain

def compute_metrics(predicted, expected):
  exact_match_metric = load("exact_match")
  rouge_metric = load("rouge")
  meteor_metric = load("meteor")

  predicted = list(map(lambda x: str(x) if x is not None else "", predicted))
  expected = list(map(lambda x: str(x) if x is not None else "", expected))

  exact_match_metric.add_batch(predictions=predicted, references=expected)
  rouge_metric.add_batch(predictions=predicted, references=expected)
  meteor_metric.add_batch(predictions=predicted, references=expected)

  em = exact_match_metric.compute()["exact_match"]
  rouge = rouge_metric.compute()["rougeL"]
  meteor = meteor_metric.compute()["meteor"]


  return em,rouge,meteor

def evaluate(rag_chain, test_data):
  all_predicted = []
  all_expected = []
  results = []
  for item in test_data:
    sentence = item["sentence"]
    expected = item["completion"]
    predicted = rag_chain.invoke({"sentence":sentence})

    print(f"sentence {sentence} \n expected {expected} \n predicted {predicted}")

    all_predicted.append(str(predicted) if predicted is not None else "")
    all_expected.append(str(expected) if expected is not None else "")
    results.append({
        "sentence":sentence,
        "expected_completion":expected,
        "predicted_completion":predicted
    })

  em, rouge, meteor = compute_metrics(all_predicted, all_expected)
  print(f"\nAverage Exact Match: {em:.2f}")
  print(f"\nAverage Rouge: {rouge:.2f}")
  print(f"\nAverage Meteor: {meteor:.2f}")

  return results



def main():
    test_data = [
    {"sentence": "Abraham Lincoln is the sixteenth", "completion": " President of the United States"},
    {"sentence": "The Uruguayan constitution allows citizens to challenge laws", "completion": " approved by Parliament by use of a Referendum, or to propose changes to the Constitution by the use of a Plebiscite."},
    {"sentence": "John Adams, Jr. (October 30,1735 July 4, 1826) was the second", "completion": "President of the United States (1797 1801). He also served as America's first Vice President (1789 1797)."}
]

    llm, embeddings = initialize_llm_and_embeddings()
    docs = load_documents()
    splits = split_documents(docs, embeddings)
    ensemble_retriever = create_retrievers(splits, embeddings)
    reranking_retriever = create_reranking_retriever(ensemble_retriever)
    generate_queries = create_query_generator(llm)


    retrieval_chain = (
      RunnableLambda(lambda x: reranking_retriever.get_relevant_documents(x["sentence"]))
      | RunnableLambda(lambda docs: {"context": get_unique_union([docs])})
    )
    rag_chain = build_rag_chain(llm, retrieval_chain)


    test_results = evaluate(rag_chain= rag_chain, test_data=test_data)




if __name__ == "__main__":
    main()


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
Device set to use cuda:0
Token indices sequence length is longer than the specified maximum sequence length for this model (823 > 512). Running this sequence through the model will result in indexing errors


sentence Abraham Lincoln is the sixteenth 
 expected  President of the United States 
 predicted Abraham Lincoln (February 12, 1809 âx80x93 April 15, 1865) was the sixteenth President of the United States, serving from March 4, 1861 until his assassination.
sentence The Uruguayan constitution allows citizens to challenge laws 
 expected  approved by Parliament by use of a Referendum, or to propose changes to the Constitution by the use of a Plebiscite. 
 predicted The Uruguayan constitution allows citizens to challenge laws approved by Parliament by use of a Referendum, or to propose changes to the Constitution by the use of a Plebiscite. During the last 15 years the method has been used several times; to confirm an amnesty to members of the military who violated human rights during the military regime (1973-1985), to stop privatization of public utilities companies (See Economy: Public Sector), to defend pensioners' incomes, and to protect water resources."
sentence John Adams, Jr. (O

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [None]:
import gradio as gr

def answer_question(question):
  answer = rag_chain.invoke({"question": question})
  return answer

llm, embeddings = initialize_llm_and_embeddings()
docs = load_documents()
splits = split_documents(docs, embeddings)
ensemble_retriever = create_retrievers(splits, embeddings)
reranking_retriever = create_reranking_retriever(ensemble_retriever)
generate_queries = create_query_generator(llm)

retrieval_chain = generate_queries | reranking_retriever.map() | get_unique_union
rag_chain = build_rag_chain(llm, retrieval_chain)


iface = gr.Interface(
    fn=answer_question,
    inputs=gr.Textbox(lines=2, placeholder="Enter your question here"),
    outputs="text",
    title="Demo"
)

iface.launch()