### IMPORTS

In [1]:
import dspy
import openai
from dsp.utils import deduplicate
import os

  from .autonotebook import tqdm as notebook_tqdm


### OPENAI_key

In [4]:
openai.api_key = os.environ['OPENAI_API_KEY']

### LM and RM models

In [3]:
#change LM to FineTuned Model
#change RM to Medical domain specialized RM

turbo = dspy.OpenAI(model='gpt-3.5-turbo')
colbertv2_wiki17_abstracts = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')

dspy.settings.configure(lm=turbo, rm=colbertv2_wiki17_abstracts)

### Pipeline Tools

In [4]:
#this is used when we do not want to use any RM

class BasicQA(dspy.Signature):
    """Answer questions with short factoid answers."""

    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

In [None]:
#this is for answering the final question
#we can see the use of this in RAG and SimplifiedBaleen

class GenerateAnswer(dspy.Signature):
    """Answer questions with short factoid answers."""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

In [5]:
#This is used to write a search query for chain of thought search using the RM
#This is used in SimplifiedBaleen Below

class GenerateSearchQuery(dspy.Signature):
    """Write a simple search query that will help answer a complex question."""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    query = dspy.OutputField()

In [None]:
#The main pipeline

class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)

    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)

In [6]:
#The main pipleline

class SimplifiedBaleen(dspy.Module):
    def __init__(self, passages_per_hop=3, max_hops=2):
        super().__init__()

        self.generate_query = [dspy.ChainOfThought(GenerateSearchQuery) for _ in range(max_hops)]
        self.retrieve = dspy.Retrieve(k=passages_per_hop)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
        self.max_hops = max_hops

    def forward(self, question):
        context = []

        for hop in range(self.max_hops):
            query = self.generate_query[hop](context=context, question=question).query
            passages = self.retrieve(query).passages
            context = deduplicate(context + passages)

        pred = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=pred.answer)

In [None]:
#basic multiHop

class BasicMH(dspy.Module):
    def __init__(self, passages_per_hop=3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=passages_per_hop)
        self.generate_query = [dspy.ChainOfThought("context, question -> search_query") for _ in range(2)]
        self.generate_answer = dspy.ChainOfThought("context, question -> answer")
    
    def forward(self, question):
        context = []
        
        for hop in range(2):
            search_query = self.generate_query[hop](context=context, question=question).search_query
            passages = self.retrieve(search_query).passages
            context = deduplicate(context + passages)

        return self.generate_answer(context=context, question=question).copy(context=context)

### Adding Assertions to Models for Better Performance

### Using Pipeline to run a few questions

In [11]:
generate_answer = dspy.Predict(BasicQA)
generate_answer_with_chain_of_thought = dspy.ChainOfThought(BasicQA)

pred = generate_answer(question="who is the creator of pensilin")

# Print the input and the prediction.
print(f"Question: who is the creator of pensilin")
print(f"Predicted Answer: {pred.answer}")

Question: who is the creator of pensilin
Predicted Answer: Alexander Fleming


In [12]:
turbo.inspect_history(n=1)





Answer questions with short factoid answers.

---

Follow the following format.

Question: ${question}
Answer: often between 1 and 5 words

---

Question: who is the creator of pensilin
Answer:[32m Alexander Fleming[0m





### Compiling the DSPy Pipeline/Model