In [1]:
from pinecone import Pinecone
import os
import numpy as np
import pandas as pd
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_huggingface import HuggingFaceEmbeddings
from pydantic import BaseModel, Field
from langchain_pinecone import PineconeVectorStore
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain.prompts import PromptTemplate
from typing import List
from langgraph.graph import END, StateGraph, START
from typing_extensions import TypedDict
from langchain_core.runnables.graph import CurveStyle, MermaidDrawMethod, NodeStyles
from IPython.display import Image, display
from pprint import pprint
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from huggingface_hub import login

**Set Up**

In [4]:
os.environ['PINECONE_API_KEY'] = 'xxx'
os.environ['OPENAI_API_KEY'] = 'xxx'

In [6]:
#Pinecone setup
index_name = 'news-embedding-stitching'
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)
retriever = vectorstore.as_retriever()
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

In [8]:
#Load Finetuned model
device = "cuda" if torch.cuda.is_available() else "cpu"
login(token='xxx')
tokenizer2 = AutoTokenizer.from_pretrained("Jiangying9/SmolLM2-1.7B-Instruct-FineTuned6")
tokenizer2.add_eos_token = True
tokenizer2.pad_token_id = 0
tokenizer2.padding_side = "left"
fine_tuned_model = AutoModelForCausalLM.from_pretrained("Jiangying9/SmolLM2-1.7B-Instruct-FineTuned6")
fine_tuned_model.to(device)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(49152, 2048, padding_idx=2)
    (layers): ModuleList(
      (0-23): 24 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): lora.Linear(
            (base_layer): Linear(in_features=2048, out_features=2048, bias=False)
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.05, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=2048, out_features=4, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=4, out_features=2048, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
            (lora_magnitude_vector): ModuleDict()
          )
          (k_proj): lora.Linear(
            (base_layer): Linear(in_features=2048, out_features=2048, bias=False)
            (lora_dropout): ModuleDict(
          

**Create Agents**

Agent 1: Is the document relevant to the question

In [12]:
# Define the data model for grading
class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents."""
    binary_score: str = Field(
        description="Documents are relevant to the question, 'yes' or 'no'"
    )

# Initialize LLM and structured output grader
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
structured_llm_grader = llm.with_structured_output(GradeDocuments, method="function_calling")

# Define the grading system prompt
system = """You are a grader assessing relevance of a retrieved document to a user question. \n
    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""

# Set up the prompt template
grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)

In [14]:
def retrieve_and_grade(question: str):
    # Retrieve relevant documents using the retriever
    retrieved_docs = retriever.get_relevant_documents(question)

    # Iterate over retrieved documents and grade each one
    grades = []
    for doc in retrieved_docs:
        document_text = doc.page_content  
        
        print("Retrieved Document:\n", document_text)
        
        # Get the grade for the document
        grade_response = structured_llm_grader.invoke(grade_prompt.format_messages(document=document_text, question=question))
        
        # Access the binary_score directly from the GradeDocuments object
        grade = grade_response.binary_score
        grades.append(grade)
        
        print(f"Grade: {grade}\n")
    
    return grades


In [16]:
question = "What are the main policy issues for the upcoming election?"
graded_docs = retrieve_and_grade(question)
print(graded_docs)

  retrieved_docs = retriever.get_relevant_documents(question)


Retrieved Document:
 https://www.huffpost.com/entry/vulnerable-house-dems-see-abortion-as-winning-campaign-theme_n_62e6847ee4b006483a9e59ea Vulnerable House Dems See Abortion As Winning Campaign Theme POLITICS The Supreme Court decision on abortion has scrambled the political dynamics heading into the November elections, when control of Congress is at stake. Thomas Beaumont, AP 2022-07-31
Grade: yes

Retrieved Document:
 https://www.huffpost.com/entry/election-2022-midterms-ohio-indiana_n_6270f5b2e4b029505df61205 2022 Midterms: What To Know About Tuesday's Primaries In Ohio And Indiana POLITICS The races, particularly in Ohio, could provide a fresh window into former President Trump's sway among the party faithful. Julie Carr Smyth and Tom Davies, AP 2022-05-03
Grade: no

Retrieved Document:
 https://www.huffpost.com/entry/tim-scott-senate-republicans-november-election_n_631ef3a4e4b027aa405d4f5e Sen. Tim Scott Downplays Electability Concerns Over Struggling Senate GOP Candidates POLITI

**Basic Rag**

In [19]:
basic_prompt = PromptTemplate.from_template("Using the following context, answer the question: \nContext: {context}\nQuestion: {query}")
basic_retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
basic_rag_chain = (
    {"context": basic_retriever, "query": RunnablePassthrough()} 
    | basic_prompt 
    | llm.with_config({"temperature": 0.7})
    | StrOutputParser()
)

In [21]:
generation= basic_rag_chain.invoke(question)

In [23]:
generation

"The main policy issues for the upcoming election include abortion, the Supreme Court decision on abortion, control of Congress, former President Trump's sway among party faithful, and electability concerns over struggling Senate GOP candidates."

Agent 2: Did RAG Hallucinate?

In [26]:
# Data model
class GradeHallucinations(BaseModel):
    """Binary score for hallucination present in generation answer."""
    binary_score: str = Field(
        description="Answer is grounded in the facts, 'yes' or 'no'"
    )


# LLM with function call
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
structured_llm_grader = llm.with_structured_output(GradeHallucinations, method="function_calling")

# Prompt for hallucination grading
system = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n
    Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts."""
hallucination_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Set of facts: \n\n {documents} \n\n LLM generation: {generation}"),
    ]
)

# Combine the prompt with the hallucination grader
hallucination_grader = hallucination_prompt | structured_llm_grader

In [28]:
def evaluate_rag_generation_with_hallucination(question: str):
    #Retrieve the relevant context using the RAG pipeline
    retrieved_docs = basic_retriever.get_relevant_documents(question)
    context = [doc.page_content for doc in retrieved_docs]  # List of documents' text
    
    #Generate the answer from the RAG chain
    generation = basic_rag_chain.invoke(question)
    
    #Use the hallucination grader to check if the answer is grounded in the context
    grade_response = hallucination_grader.invoke({"documents": context, "generation": generation})
    
    #Return the hallucination grade
    return grade_response.binary_score

In [30]:
question = "What are the main policy issues for the upcoming election?"
hallucination_score = evaluate_rag_generation_with_hallucination(question)
print(hallucination_score)

yes


Agent 3: Did the RAG answer the Question

In [33]:
# Data model for grading answers
class GradeAnswer(BaseModel):
    """Binary score to assess if the answer addresses the question."""
    
    binary_score: str = Field(
        description="Answer addresses the question, 'yes' or 'no'"
    )

# LLM with function call
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
structured_llm_grader = llm.with_structured_output(GradeAnswer, method="function_calling")

# Prompt template to evaluate if the answer addresses the question
system = """You are a grader assessing whether an answer addresses / resolves the question. \n
     Give a binary score 'yes' or 'no'. 'Yes' means that the answer resolves the question."""
answer_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "User question: \n\n {question} \n\n LLM generation: {generation}"),
    ]
)

# Grader that uses the structured LLM and the answer prompt
answer_grader = answer_prompt | structured_llm_grader

In [35]:
# Function to evaluate a RAG generation using the answer grader
def evaluate_rag_generation_with_answer(question: str):
    #Retrieve the relevant context using the RAG pipeline
    retrieved_docs = basic_retriever.get_relevant_documents(question)
    context = [doc.page_content for doc in retrieved_docs]
    
    #Generate the answer from the RAG chain
    generation = basic_rag_chain.invoke(question)
    
    #Use the answer grader to check if the answer resolves the question
    grade_response = answer_grader.invoke({"question": question, "generation": generation})
    
    #Return the answer grade
    return grade_response.binary_score

In [37]:
question = "What are the main policy issues for the upcoming election?"
answer_score = evaluate_rag_generation_with_answer(question)
print(answer_score)

yes


Agent: Rewrite Question

In [40]:
### Question Re-writer
# LLM
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

# Prompt
system = """You a question re-writer that converts an input question to a better version that is optimized \n 
     for vectorstore retrieval. Look at the input and try to reason about the underlying semantic intent / meaning."""
re_write_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        (
            "human",
            "Here is the initial question: \n\n {question} \n Formulate an improved question.",
        ),
    ]
)

question_rewriter = re_write_prompt | llm | StrOutputParser()

In [42]:
# Function to rewrite a question for vectorstore retrieval
def rewrite_question_for_retrieval(question: str):    
    #Use the question rewriter pipeline to optimize the question
    improved_question = question_rewriter.invoke({"question": question})
    
    return improved_question

In [44]:
question = "What are the key policies being discussed in the upcoming election?"
improved_question = rewrite_question_for_retrieval(question)

In [46]:
print(f"Original Question: {question}")

Original Question: What are the key policies being discussed in the upcoming election?


In [48]:
print(f"Improved Question: {improved_question}")

Improved Question: What are the main policy topics up for debate in the upcoming election?


In [50]:
# Define generator parameters
generator_params = {
    "max_new_tokens": 250, 
    "temperature": 0.7,  
    "top_p": 0.9,  
    "do_sample": True, 
    "pad_token_id": tokenizer2.eos_token_id,  
    "eos_token_id": tokenizer2.eos_token_id 
}

def process_trump_style(input_dict: dict) -> dict:
    """
    Converts the input text into a Trump-like style using the fine-tuned model.

    Args:
        input_text (str): The text to be translated into Trump-like style.

    Returns:
        str: The refined output after processing in Trump-like style.
    """
    # Define the question template for the model
    question_template = '''You will be asked to rewrite the following text in about 50 words in the style and tone of Donald Trump. Place four asterisks (****) before the response. Do not add any additional text after the answer.

Text:
{description_text}

Response:'''

    # Extract only the 'generation' part from the input dictionary
    input_text = input_dict.get('generation', '')

    # Format the input text into the model's prompt structure
    prompt = question_template.format(description_text=input_text)

    # Encode the formatted prompt
    inputs = tokenizer2.encode(prompt, return_tensors="pt").to(fine_tuned_model.device)

    # Generate the output from the model
    with torch.no_grad():
        outputs = fine_tuned_model.generate(inputs, **generator_params)

    # Decode and return the generated response
    refined_generation = tokenizer2.decode(outputs[0], skip_special_tokens=True)

    # Return the final Trump-like styled response
    return {"generation": refined_generation}

**LangGraph**

In [53]:
class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        rewritten_question: optimized question
        generation: LLM generation
        documents: list of documents
        hallucination_score: score indicating whether generation is grounded in facts
        answer_grade: score indicating whether the answer addresses the question
    """
    
    question: str
    rewritten_question: str  
    generation: str
    documents: List[str]
    hallucination_score: str 
    answer_grade: str  

In [55]:
def retrieve(state: GraphState) -> GraphState:
    """
    Retrieve documents

    Args:
        state (GraphState): The current graph state

    Returns:
        GraphState: Updated state with the new key "documents" containing retrieved documents
    """
    print("---RETRIEVE---")
    question = state["question"]

    # Retrieval
    documents = retriever.invoke(question)
    return {**state, "documents": documents}


def generate(state: GraphState) -> GraphState:
    """
    Generate answer

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print("---GENERATE---")
    question = state["question"]
    documents = state["documents"]

    # RAG generation using basic_rag_chain
    generation = basic_rag_chain.invoke(question)
    return {"documents": documents, "question": question, "generation": generation}


def grade_documents(state):
    """
    Determines whether the retrieved documents are relevant to the question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates documents key with only filtered relevant documents
    """

    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
    question = state["question"]
    documents = state["documents"]

    # Score each document using the structured_llm_grader
    filtered_docs = []
    for d in documents:
        document_text = d.page_content  

        # Use structured_llm_grader to grade relevance
        grade_response = structured_llm_grader.invoke(grade_prompt.format_messages(document=document_text, question=question))
        
        # Access the binary score from the response
        grade = grade_response.binary_score
        if grade == "yes":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)
        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
            continue

    return {"documents": filtered_docs, "question": question}


def transform_query(state: GraphState) -> GraphState:
    """
    Transform the query to produce a better question.

    Args:
        state (GraphState): The current graph state

    Returns:
        GraphState: Updated state with a rephrased question
    """
    print("---TRANSFORM QUERY---")
    question = state["question"]
    documents = state["documents"]

    # Re-write question
    better_question = question_rewriter.invoke({"question": question})
    return {**state, "question": better_question}


def evaluate_hallucination(state: GraphState) -> GraphState:
    """
    Grade the hallucination score of the generation.

    Args:
        state (GraphState): The current graph state

    Returns:
        GraphState: Updated state with the hallucination score
    """
    print("---EVALUATE HALLUCINATION---")
    documents = state["documents"]
    generation = state["generation"]

    # Grade hallucination
    hallucination_score = hallucination_grader.invoke({
        "documents": documents, 
        "generation": generation
    })
    
    return {**state, "hallucination_score": hallucination_score.binary_score}


def evaluate_answer(state: GraphState) -> GraphState:
    """
    Grade whether the generated answer addresses the question.

    Args:
        state (GraphState): The current graph state

    Returns:
        GraphState: Updated state with the answer grade
    """
    print("---EVALUATE ANSWER---")
    question = state["question"]
    generation = state["generation"]

    # Grade answer
    answer_grade = answer_grader.invoke({
        "question": question, 
        "generation": generation
    })
    
    return {**state, "answer_grade": answer_grade.binary_score}
    
def process_final_answer(state: GraphState) -> GraphState:
    """
    Process the generated answer through the fine-tuned model before finalizing the answer.

    Args:
        state (GraphState): The current graph state

    Returns:
        GraphState: Updated state with the refined answer
    """
    print("---PROCESSING THROUGH FINE-TUNED TRUMP-STYLE MODEL---")
    generation = state.get("generation")
    
    # Refine the generation with the fine-tuned model
    refined_generation = process_trump_style(generation)
    
    return {**state, "final_generation": refined_generation}

In [57]:
def decide_to_generate(state: GraphState) -> str:
    """
    Determines whether to generate an answer or regenerate a question.

    Args:
        state (GraphState): The current graph state

    Returns:
        str: Decision for the next node to call ("not_relevant" or "relevant")
    """

    print("---ASSESS GRADED DOCUMENTS---")
    filtered_documents = state["documents"]

    if not filtered_documents:
        # If no relevant documents, we need to regenerate a new query
        print("---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---")
        return "not_relevant"
    else:
        # If we have relevant documents, proceed to generate the answer
        print("---DECISION: GENERATE---")
        return "relevant"


def grade_generation_v_documents_and_question(state: GraphState) -> str:
    """
    Determines whether the generation is grounded in the document and answers the question.

    Args:
        state (GraphState): The current graph state

    Returns:
        str: Decision for the next node to call ("useful", "not useful", or "not supported")
    """

    print("---CHECK HALLUCINATIONS---")
    question = state["question"]
    documents = state["documents"]
    generation = state["generation"]

    # Evaluate hallucination score
    score = hallucination_grader.invoke({"documents": documents, "generation": generation})
    hallucination_grade = score.binary_score

    # Check if the generation is grounded in the documents
    if hallucination_grade == "yes":
        print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
        
        # Evaluate whether the generation addresses the question
        print("---GRADE GENERATION vs QUESTION---")
        score = answer_grader.invoke({"question": question, "generation": generation})
        answer_grade = score.binary_score
        
        if answer_grade == "yes":
            print("---DECISION: GENERATION ADDRESSES QUESTION---")
            return "useful"
        else:
            print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
            return "not useful"
    else:
        print("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
        return "not supported"


In [59]:
# Initialize the workflow with the GraphState
workflow = StateGraph(GraphState)

# Define the nodes (functions)
workflow.add_node("retrieve", retrieve)  # retrieve documents
workflow.add_node("grade_documents", grade_documents)  # grade documents for relevance
workflow.add_node("generate", generate)  # generate the answer based on context
workflow.add_node("transform_query", transform_query)  # transform the query to make it more precise

# Build the graph with edges
workflow.add_edge(START, "retrieve")  # Start with retrieving documents
workflow.add_edge("retrieve", "grade_documents")  # After retrieval, grade documents for relevance

# Conditional edges for document relevance
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,  # Function that decides if we need to generate or transform query
    {
        "not_relevant": "transform_query",  # If documents are not relevant, rephrase query
        "relevant": "generate",  # If documents are relevant, generate an answer
    },
)

workflow.add_edge("transform_query", "retrieve")  # If query is transformed, go back to retrieval

# Define conditional edges for the 'generate' node based on the decision made
workflow.add_conditional_edges(
    "generate",
    grade_generation_v_documents_and_question,  # Function to check if generation answers the question
    {
        "not supported": "generate",  # If generation is not supported, retry generation
        "not useful": "transform_query",  # If generation doesn't address the question, transform the query
        "useful": "process_trump_style",  # Only if the decision is to generate, proceed to processing
    },
)

# Add the new step in the workflow
workflow.add_node("process_trump_style", process_trump_style)

# Connect the 'generate' step to 'process_final_answer' (only when generation occurs)
workflow.add_edge("process_trump_style", END) 

# Compile the workflow into a runnable application
app = workflow.compile()

**CLI**

In [67]:
# Command-line interface to run the multi-agent system with multiple questions
def main():
    print("Welcome to the Multi-Agent Question Answering System!\n")
    
    while True:
        question = input("Please enter your question: ")
        print(f"\nProcessing the question: {question}\n")

        # Initiate the workflow 
        state = {"question": question}
        state = retrieve(state)
        state = grade_documents(state)
        state = generate(state)

        # Proceed through the various steps of the workflow
        state = evaluate_hallucination(state)
        state = evaluate_answer(state)
        state = process_trump_style(state)

        # Final decision
        print(f"Final Answer: {state['generation']}\n")

        # Ask the user if they want to ask another question
        another_question = input("Would you like to ask another question? (yes/no): ").strip().lower()
        if another_question != "yes":
            print("Thank you for using the system. Goodbye!")
            break

if __name__ == "__main__":
    main()

Welcome to the Multi-Agent Question Answering System!



Please enter your question:  What are the main policy issues for the upcoming election?



Processing the question: What are the main policy issues for the upcoming election?

---RETRIEVE---
---CHECK DOCUMENT RELEVANCE TO QUESTION---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GENERATE---
---EVALUATE HALLUCINATION---
---EVALUATE ANSWER---
Final Answer: You will be asked to rewrite the following text in about 50 words in the style and tone of Donald Trump. Place four asterisks (****) before the response. Do not add any additional text after the answer.

Text:
The main policy issues for the upcoming election include abortion, the Supreme Court decision on abortion, control of Congress, former President Trump's sway among party faithful, and electability concerns over struggling Senate GOP candidates.

Response:
Folks, let me tell you, nobody knows more about big league politics than I do. And believe me, the issues on this election are huge. We're talking about abortion, the Supreme

Would you like to ask another question? (yes/no):  yes
Please enter your question:  What happened to Queen Elizabeth II?



Processing the question: What happened to Queen Elizabeth II?

---RETRIEVE---
---CHECK DOCUMENT RELEVANCE TO QUESTION---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GENERATE---
---EVALUATE HALLUCINATION---
---EVALUATE ANSWER---
Final Answer: You will be asked to rewrite the following text in about 50 words in the style and tone of Donald Trump. Place four asterisks (****) before the response. Do not add any additional text after the answer.

Text:
Queen Elizabeth II sprained her back and missed the Remembrance Sunday service.

Response:
Folks, let me tell you, Queen Elizabeth is a total loser. She can't even make it to a big event like Remembrance Sunday. Sad!



Would you like to ask another question? (yes/no):  no


Thank you for using the system. Goodbye!
