In [32]:
!pip3 install dspy-ai weaviate-client python-dotenv jinja2  > /dev/null

In [19]:
import dspy
from dspy.retrieve.weaviate_rm import WeaviateRM
import weaviate
import os
from dotenv import load_dotenv

load_dotenv(".env")
headers = {"X-Openai-Api-Key":os.getenv("OPENAI_API_KEY")}

client = weaviate.Client("http://localhost:8080",additional_headers=headers)

GEN_LM = dspy.OpenAI(model="gpt-4",api_key=os.getenv("OPENAI_API_KEY"))
EVAL_LM = dspy.OpenAI(model="gpt-3.5-turbo",api_key=os.getenv("OPENAI_API_KEY"))
RETRIVER_MODEL = WeaviateRM("Paul_Graham",weaviate_client=client)


            Consider upgrading to the new and improved v4 client instead!
            See here for usage: https://weaviate.io/developers/weaviate/client-libraries/python
            


In [4]:
dspy.settings.configure(lm=GEN_LM,rm=RETRIVER_MODEL)

In [34]:
# TODO: Maybe question from user input

_questions = ["Does graham went to MIT??",
             "What was the first program graham tried??",
             "What did he say about AI??",
             "Did he like paintings??",
             "Why did graham hated Physics?"]
questions = [dspy.Example(question=question).with_inputs("question") for question in _questions]

In [39]:
class GenerateAnswer(dspy.Signature):
    """You will be given relevant context. Generate answers grounded with that context"""
    question = dspy.InputField()
    context = dspy.InputField(desc="may contain relevant context to the question")
    answer = dspy.OutputField()


class RAG(dspy.Module):
    def __init__(self):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=2)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
    
    def forward(self,question):
        _context = self.retrieve(question).passages
        context = ".".join(_context)
        _answer = self.generate_answer(context=context,question=question)
        return dspy.Prediction(answer = _answer.answer)    # need to be returned as Prediction for evaluator
    


In [40]:
class EvalSignature(dspy.Signature):
    Context = dspy.InputField(desc="The context for answering the question")
    Question = dspy.InputField(desc="The question to be answered")
    Answer = dspy.InputField(desc="Generated answer to the question")
    Evaluation = dspy.OutputField(desc="Rating between 1 and 5.")

def eval_answer(gold,pred):
    question = gold.question
    answer  = pred.answer
    print(f"Requested Question: {question}")
    print(f"Generated Answer: {answer}")

    # Parameters for evaluation
    detail = "Is the Answer detail?"
    faithful = "Is the generated answer grounded in the context? Say no if it includes significant facts not in the context."
    overall =   f"Please rate how well this answer answers the question, {question} based on on the context.\n"

    with dspy.context(lm=EVAL_LM):
        context = ".".join(dspy.Retrieve(k=2)(question).passages)
        detail = dspy.ChainOfThought(EvalSignature)(Context="N/A",Question=detail,Answer=answer)
        faithful = dspy.ChainOfThought(EvalSignature)(Context=context,Question=faithful,Answer=answer)
        overall = dspy.ChainOfThought(EvalSignature)(Context=context,Question=overall,Answer=answer)

    total = float(detail.Evaluation) + float(faithful.Evaluation)*2 + float(overall.Evaluation)

    print(f"Detail   Score: {detail.Evaluation}")
    print(f"Faithful Score: {faithful.Evaluation}")
    print(f"Overall  Score: {overall.Evaluation}")

    return total/5.0

In [41]:
from dspy.evaluate.evaluate import Evaluate

evaluate = Evaluate(devset=questions,display_progress=True,display_table=5)
evaluate(RAG(),metric=eval_answer)



Requested Question: Does graham went to MIT??
Generated Answer: No, Graham did not go to MIT. He went to Harvard.




Detail   Score: 5
Faithful Score: 5
Overall  Score: 5
Requested Question: What was the first program graham tried??
Generated Answer: The first program Graham tried was on the IBM 1401.




Detail   Score: 2
Faithful Score: 5
Overall  Score: 5
Requested Question: What did he say about AI??
Generated Answer: He said that the AI being practiced at the time was a hoax. He described this AI as a program that translates a statement into a formal representation and adds it to its knowledge base.




Detail   Score: 3
Faithful Score: 5
Overall  Score: 5
Requested Question: Did he like paintings??
Generated Answer: Yes, he liked paintings.




Detail   Score: 2
Faithful Score: 1
Overall  Score: 5
Requested Question: Why did graham hated Physics?
Generated Answer: The context does not provide information on why Graham hated Physics.


Average Metric: 15.0 / 5  (300.0): 100%|██████████| 5/5 [00:05<00:00,  1.15s/it]

Detail   Score: 2
Faithful Score: 2
Overall  Score: 5
Average Metric: 15.0 / 5  (300.0%)



  df = df.applymap(truncate_cell)
  df.loc[:, metric_name] = df[metric_name].apply(


Unnamed: 0,question,answer,eval_answer
0,Does graham went to MIT??,"No, Graham did not go to MIT. He went to Harvard.",4.0
1,What was the first program graham tried??,The first program Graham tried was on the IBM 1401.,3.4
2,What did he say about AI??,He said that the AI being practiced at the time was a hoax. He described this AI as a program that translates a statement into...,3.6
3,Did he like paintings??,"Yes, he liked paintings.",1.8
4,Why did graham hated Physics?,The context does not provide information on why Graham hated Physics.,2.2


300.0