In [1]:
from operator import itemgetter

from langchain.chat_models import ChatOpenAI, ChatAnthropic
from langchain.prompts import SystemMessagePromptTemplate, ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.runnables.openai_functions import OpenAIFunctionsRouter

from permchain.connection_inmemory import InMemoryPubSubConnection
from permchain.pubsub import PubSub
from permchain.topic import Topic

## Content Fetcher

First, we are going to define our content fetcher. This is responsible for taking a search query an getting relevant web pages.

In [2]:
from langchain.utilities import GoogleSearchAPIWrapper
from langchain.document_loaders import AsyncHtmlLoader
from langchain.document_transformers import Html2TextTransformer

In [3]:
from duckduckgo_search import DDGS

ddgs = DDGS()

In [4]:
def retrieve_documents(query):
    query = query.strip().strip('"')
    search_results = ddgs.text(query)
    urls_to_look = []
    for res in search_results:
        if res.get("href", None):
            urls_to_look.append(res["href"])
        if len(urls_to_look) >= 4:
            break

    # Relevant urls
    # Load, split, and add new urls to vectorstore
    if urls_to_look:
        loader = AsyncHtmlLoader(urls_to_look)
        html2text = Html2TextTransformer()
        docs = loader.load()
        docs = list(html2text.transform_documents(docs))
    else:
        docs = []
    return docs

In [5]:
import nest_asyncio

nest_asyncio.apply()

In [6]:
# docs = retrieve_documents("langchain")

## Summarizer
We will now come up with an actor to summarize the results given a query and some search results

In [24]:
prompt = ChatPromptTemplate.from_template(
    "Answer the user's question given the search results\n\n<question>{question}</question><search_results>{search_results}</search_results>"
)

In [25]:
summarizer_chain = (
    prompt
    | ChatOpenAI(max_retries=0).with_fallbacks(
        [ChatOpenAI(model="gpt-3.5-turbo-16k"), ChatAnthropic(model="claude-2")]
    )
    | StrOutputParser()
)

## All together now!

In [26]:
summarizer_inbox = Topic("summarizer")

In [27]:
search_actor = (
    Topic.IN.subscribe()
    | {
        "search_results": retrieve_documents,
        "question": Topic.IN.current(),
    }
    | summarizer_inbox.publish()
)

In [43]:
summ_actor = (
    summarizer_inbox.subscribe() | {"answer": summarizer_chain} | Topic.OUT.publish()
)

In [44]:
web_researcher = PubSub(
    processes=(search_actor, summ_actor),
    connection=InMemoryPubSubConnection(),
)

In [45]:
web_researcher.invoke("What is langsmith?")

Fetching pages: 100%|#############################################################################################################################################################################################| 4/4 [00:00<00:00,  6.44it/s]


[{'answer': 'LangSmith is a platform for building production-grade language model applications. It helps trace and evaluate language model applications and intelligent agents, making it easier to move from prototype to production. LangSmith is developed by LangChain, the company behind the open source LangChain framework. More information can be found in the LangSmith documentation.'}]

In [47]:
web_researcher.batch(["what is langsmith", "what is llama"])

Fetching pages:   0%|                                                                                                                                                                                                     | 0/4 [00:00<?, ?it/s]
Fetching pages: 100%|#############################################################################################################################################################################################| 4/4 [00:00<00:00,  6.93it/s][A
Fetching pages: 100%|#############################################################################################################################################################################################| 4/4 [00:00<00:00,  4.79it/s]


TypeError: unsupported operand type(s) for +=: 'dict' and 'dict'

## Trying to use it as a sub component

In [31]:
from langchain.output_parsers.openai_functions import JsonKeyOutputFunctionsParser

In [32]:
template = """Write between 2 and 5 sub questions that serve as google search queries to search online that form an objective opinion from the following: {question}"""
functions = [
    {
        "name": "sub_questions",
        "description": "List of sub questions",
        "parameters": {
            "type": "object",
            "properties": {
                "questions": {
                    "type": "array",
                    "description": "List of sub questions to ask.",
                    "items": {"type": "string"},
                },
            },
        },
    },
]
prompt = ChatPromptTemplate.from_template(template)
question_chain = (
    prompt
    | ChatOpenAI(temperature=0).bind(
        functions=functions, function_call={"name": "sub_questions"}
    )
    | JsonKeyOutputFunctionsParser(key_name="questions")
)

In [33]:
question_chain.invoke({"question": "what is langsmith?"})

['What is the purpose of Langsmith?',
 'Who developed Langsmith?',
 'What are the features of Langsmith?',
 'How does Langsmith work?',
 'Are there any alternatives to Langsmith?']

In [34]:
template = """You are tasked with writing a research report to answer the following question:

<question>
{question}
</question>

In order to do that, you first came up with several sub questions and researched those. please find those below:

<research>
{research}
</research>

Now, write your final report answering the original question!"""
prompt = ChatPromptTemplate.from_template(template)
report_chain = prompt | ChatOpenAI() | StrOutputParser()

In [35]:
research_inbox = Topic("research")
writer_inbox = Topic("writer_inbox")

In [36]:
subquestion_actor = (
    # Listed in inputs
    Topic.IN.subscribe()
    | question_chain
    # The draft always goes to the editors inbox
    | research_inbox.publish()
)
research_actor = (
    research_inbox.subscribe()
    | {
        "research": lambda x: web_researcher.batch(x),
        # "research": lambda x: [web_researcher.invoke({"question": i}) for i in x],
        "question": Topic.IN.current() | itemgetter("question"),
    }
    | writer_inbox.publish()
)
write_actor = writer_inbox.subscribe() | report_chain | Topic.OUT.publish()

In [37]:
longer_researcher = PubSub(
    processes=(subquestion_actor, research_actor, write_actor),
    connection=InMemoryPubSubConnection(),
)

In [38]:
longer_researcher.invoke({"question": "what is langsmith?"})

Fetching pages:   0%|                                                                                                                                                                                               | 0/4 [00:00<?, ?it/s]
Fetching pages:   0%|                                                                                                                                                                                               | 0/4 [00:00<?, ?it/s][A

Fetching pages:   0%|                                                                                                                                                                                               | 0/4 [00:00<?, ?it/s][A[A


Fetching pages:   0%|                                                                                                                                                                                               | 0/4 [00:00<?, ?it/s][A[A[A



Fetching pages:   0%|               

['Research Report: Understanding LangSmith\n\nIntroduction:\nThe purpose of this research report is to provide a comprehensive understanding of LangSmith, a developer platform designed to facilitate the development and management of Language Model applications (LLMs). Through an analysis of the gathered research, this report aims to answer the question: "What is LangSmith?"\n\nResearch Findings:\n\n1. LangSmith Overview:\nLangSmith is a unified platform that helps developers trace, evaluate, and monitor LLM applications and intelligent agents. It aims to simplify the process of moving from prototype to production by providing features such as tracing runs, testing prompts or answers, and exporting datasets and runs for further analysis. LangSmith offers comprehensive visibility into the chain sequence of calls, real-time insights, and observability features to monitor LLM applications. It emphasizes best practices and offers useful tools and resources for developers working with LLMs.\