In [1]:
!pip install -U dspy-ai openai jinja2 langchain langchain-community langchain-openai langchain-core

Collecting dspy-ai
  Downloading dspy_ai-2.3.1-py3-none-any.whl (164 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m164.8/164.8 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: dspy-ai
  Attempting uninstall: dspy-ai
    Found existing installation: dspy-ai 2.2.0
    Uninstalling dspy-ai-2.2.0:
      Successfully uninstalled dspy-ai-2.2.0
Successfully installed dspy-ai-2.3.1

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


## Setup

In [2]:
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass()

In [3]:
import dspy
colbertv2 = dspy.ColBERTv2(url="http://20.102.90.50:2017/wiki17_abstracts")

  from .autonotebook import tqdm as notebook_tqdm


In [22]:
from langchain.cache import SQLiteCache
from langchain.globals import set_llm_cache
from langchain_openai import OpenAI

set_llm_cache(SQLiteCache(database_path="cache.db"))

llm = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0)


In [23]:
def retrieve(inputs):
    return [doc["text"] for doc in colbertv2(inputs["question"], k=5)]

In [24]:
colbertv2("cycling")


[{'text': 'Cycling | Cycling, also called bicycling or biking, is the use of bicycles for transport, recreation, exercise or sport. Persons engaged in cycling are referred to as "cyclists", "bikers", or less commonly, as "bicyclists". Apart from two-wheeled bicycles, "cycling" also includes the riding of unicycles, tricycles, quadracycles, recumbent and similar human-powered vehicles (HPVs).',
  'pid': 2201868,
  'rank': 1,
  'score': 27.078739166259766,
  'prob': 0.3544841299722533,
  'long_text': 'Cycling | Cycling, also called bicycling or biking, is the use of bicycles for transport, recreation, exercise or sport. Persons engaged in cycling are referred to as "cyclists", "bikers", or less commonly, as "bicyclists". Apart from two-wheeled bicycles, "cycling" also includes the riding of unicycles, tricycles, quadracycles, recumbent and similar human-powered vehicles (HPVs).'},
 {'text': 'Cycling (ice hockey) | In ice hockey, cycling is an offensive strategy that moves the puck along 

In [25]:
colbertv2("false vacuum decay")

[{'text': 'False vacuum | In quantum field theory, a false vacuum is a vacuum which exists at a "local" minimum of energy and is therefore not truly stable. This is in contrast to a "true vacuum", which exists at a "global" minimum and is stable. A false vacuum may be very long-lived, or "metastable".',
  'pid': 4931026,
  'rank': 1,
  'score': 23.21883773803711,
  'prob': 0.9678558463523195,
  'long_text': 'False vacuum | In quantum field theory, a false vacuum is a vacuum which exists at a "local" minimum of energy and is therefore not truly stable. This is in contrast to a "true vacuum", which exists at a "global" minimum and is stable. A false vacuum may be very long-lived, or "metastable".'},
 {'text': 'Stereum ostrea | Stereum ostrea, also called false turkey-tail and golden curtain crust, is a basidiomycete fungus in the genus "Stereum". It is a plant pathogen and a wood decay fungus. The name "ostrea", from the word \'oyster\', describes its shape.',
  'pid': 481790,
  'rank': 

## Normal LCEL

In [26]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough


In [27]:
prompt = PromptTemplate.from_template(
    "Given {context}, answer the question '{question}' as a tweet."
)

vanilla_chain = (
    RunnablePassthrough.assign(context=retrieve) | prompt | llm | StrOutputParser() 
)

In [28]:
vanilla_chain.invoke({"question":"meaning of life??"})

'\n\n"The meaning of life is a complex and subjective concept, with many different interpretations and beliefs. Some see it as a purpose to be fulfilled, while others view it as a journey to be experienced. Regardless, it is a question that has fascinated humanity for centuries. #meaningoflife #philosophy"'

## LCEL \<> DSPy

In order to use LangChain with DSPy, you need to make two minor modifications

<b>LangChainPredict</b>

You need to change from doing prompt | llm to using LangChainPredict(prompt, llm) from dspy.

This is a wrapper which will bind your prompt and llm together so you can optimize them

<b>LangChainModule</b>

This is a wrapper which wraps your final LCEL chain so that DSPy can optimize the whole thing

In [29]:
# From DSPy import the modules that know how to interact with LangChain LCEL
from dspy.predict.langchain import LangChainModule, LangChainPredict

In [30]:
# This is how to wrap it so it behaves like a DSPy program.
zeroshot_chain = (
    RunnablePassthrough.assign(context=retrieve)
    | LangChainPredict(prompt, llm)
    | StrOutputParser()
)

AttributeError: 'FieldInfo' object has no attribute 'finalize'

In [31]:
zeroshot_chain = LangChainModule(
    zeroshot_chain
)


NameError: name 'zeroshot_chain' is not defined

In [32]:
zeroshot_chain.invoke({'question': 'what should I say??'})


NameError: name 'zeroshot_chain' is not defined

## Trying the Module

In [33]:
question = "In what region was Eddy Mazzoleni born?"

zeroshot_chain.invoke({"question": question})


NameError: name 'zeroshot_chain' is not defined

## Load Data

In order to compile our chain, we need a dataset to work with. This dataset just needs to be raw inputs and outputs. For our purposes, we will use HotPotQA dataset

In [34]:
import dspy
from dspy.datasets import HotPotQA

# Load the dataset.
dataset = HotPotQA(
    train_seed=1,
    train_size=200,
    eval_seed=2023,
    dev_size=200,
    test_size=0,
    keep_details=True,
)

# Tell DSPy that the 'question' field is the input. Any other fields are labels and/or metadata.
trainset = [x.without("id", "type").with_inputs("question") for x in dataset.train]
devset = [x.without("id", "type").with_inputs("question") for x in dataset.dev]
valset, devset = devset[:50], devset[50:]

Downloading data:  10%|▉         | 56.4M/566M [01:10<10:36, 801kB/s] 


KeyboardInterrupt: 

## Define a metic

In [None]:
# Define the signature for autoamtic assessments.
class Assess(dspy.Signature):
    """Assess the quality of a tweet along the specified dimension."""

    context = dspy.InputField(desc="ignore if N/A")
    assessed_text = dspy.InputField()
    assessment_question = dspy.InputField()
    assessment_answer = dspy.OutputField(desc="Yes or No")


gpt4T = dspy.OpenAI(model="gpt-4-1106-preview", max_tokens=1000, model_type="chat")
METRIC = None


def metric(gold, pred, trace=None):
    question, answer, tweet = gold.question, gold.answer, pred.output
    context = colbertv2(question, k=5)

    engaging = "Does the assessed text make for a self-contained, engaging tweet?"
    faithful = "Is the assessed text grounded in the context? Say no if it includes significant facts not in the context."
    correct = (
        f"The text above is should answer `{question}`. The gold answer is `{answer}`."
    )
    correct = f"{correct} Does the assessed text above contain the gold answer?"

    with dspy.context(lm=gpt4T):
        faithful = dspy.Predict(Assess)(
            context=context, assessed_text=tweet, assessment_question=faithful
        )
        correct = dspy.Predict(Assess)(
            context="N/A", assessed_text=tweet, assessment_question=correct
        )
        engaging = dspy.Predict(Assess)(
            context="N/A", assessed_text=tweet, assessment_question=engaging
        )

    correct, engaging, faithful = [
        m.assessment_answer.split()[0].lower() == "yes"
        for m in [correct, engaging, faithful]
    ]
    score = (correct + engaging + faithful) if correct and (len(tweet) <= 280) else 0

    if METRIC is not None:
        if METRIC == "correct":
            return correct
        if METRIC == "engaging":
            return engaging
        if METRIC == "faithful":
            return faithful

    if trace is not None:
        return score >= 3
    return score / 3.0

## Evaluate Baseline

In [None]:
from dspy.evaluate.evaluate import Evaluate

In [None]:
evaluate = Evaluate(
    metric=metric, devset=devset, num_threads=8, display_progress=True, display_table=5
)
evaluate(zeroshot_chain)

## Optimize

In [None]:
from dspy.teleprompt import BootstrapFewShotWithRandomSearch

In [None]:
# Set up the optimizer. We'll use very minimal hyperparameters for this example.
# Just do random search with ~3 attempts, and in each attempt, bootstrap <= 3 traces.
optimizer = BootstrapFewShotWithRandomSearch(
    metric=metric, max_bootstrapped_demos=3, num_candidate_programs=3
)

# Now use the optimizer to *compile* the chain. This could take 5-10 minutes, unless it's cached.
optimized_chain = optimizer.compile(zeroshot_chain, trainset=trainset, valset=valset)

## Evaluating the optimized chain

In [None]:
evaluate(optimized_chain)

## Inspect the optimized chain

### Look at the prompt used

In [None]:
prompt_used, output = dspy.settings.langchain_history[-1]
print(prompt_used)

### Look at the demos

In [None]:
demos = [
    eg
    for eg in optimized_chain.modules[0].demos
    if hasattr(eg, "augmented") and eg.augmented
]

In [None]:
demos
