In [None]:
#!pip install -U langchain-openai langchain langchainhub tavily-python
OPENAI_API_KEY= ""

In [None]:
import os
os.environ['OPENAI_API_KEY']=OPENAI_API_KEY

# Streaming

In [None]:
from langchain_openai import OpenAI

llm = OpenAI(model="gpt-3.5-turbo-instruct", temperature=0,max_tokens=256)
async for chunk in llm.astream("Écris-moi une chansons sur les oranges."):
    print(chunk, end="", flush=True)



Verse 1:
Dans un verger ensoleillé
Poussent des fruits colorés
Des oranges juteuses et sucrées
Qui nous font saliver

Refrain:
Oh les oranges, si belles et si rondes
On en raffole, elles sont si bonnes
On les croque, on les presse en jus
Les oranges, c'est un vrai délice pour nous

Verse 2:
Leur peau est si douce et lisse
Et leur parfum nous envoûte
On les cueille avec délice
Et on en fait des compotes

Refrain:
Oh les oranges, si belles et si rondes
On en raffole, elles sont si bonnes
On les croque, on les presse en jus
Les oranges, c'est un vrai délice pour nous

Pont:
Et quand vient l'hiver
Les oranges sont là pour nous réchauffer
Dans un bon vin chaud
Ou en marmelade sur du pain chaud

Refrain:
Oh les oranges, si belles et si rondes
On en raffole, elles sont si bonnes
On les croque,

In [None]:
from langchain_openai import OpenAI

llm = OpenAI(model="gpt-3.5-turbo-instruct", temperature=0,max_tokens=256,streaming=True)
for chunk in llm.stream("Écris-moi une chansons sur les oranges."):
    print(chunk, end="", flush=True)

## Streaming d'agent

In [None]:
from langchain import hub
from langchain.agents import AgentExecutor, create_openai_functions_agent
# Get the prompt to use - you can modify this!
prompt = hub.pull("hwchase17/openai-tools-agent")
prompt.messages[0].prompt.template="Tu es un assistant IA serviable."
prompt.messages

In [None]:
import random
from langchain.tools import tool

#define tools
@tool
async def where_cat_is_hiding() -> str:
    """Où le chat se cache-t-il?"""
    return random.choice(["sous le lit", "sur l'étagère"])


@tool
async def get_items(place: str) -> str:
    """Utilisez cet outil pour rechercher les éléments qui se trouvent à l'endroit donné."""
    if "bed" in place:  # For under the bed
        return "chaussettes, chaussures et poussière"
    if "shelf" in place:  # For 'shelf'
        return "livres, crayons et photos"
    else:  # if the agent decides to ask about a different place
        return "nourriture pour chat"

In [None]:
from langchain_openai import ChatOpenAI
# Choose the LLM that will drive the agent
chat_model = ChatOpenAI(temperature=0.9,streaming=True)
tools = [get_items, where_cat_is_hiding]
# Construct the OpenAI Functions agent
agent = create_openai_functions_agent(chat_model.with_config({"tags": ["agent_llm"]}), tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools).with_config(
    {"run_name": "Agent"}
)

In [None]:
# Note: We use `pprint` to print only to depth 1, it makes it easier to see the output from a high level, before digging in.
import pprint

chunks = []

async for chunk in agent_executor.astream(
    {"input": "Quels sont les objets qui se trouvent à l'endroit où le chat se cache?"}
):
    chunks.append(chunk)
    print("------")
    pprint.pprint(chunk, depth=1)

------
{'actions': [...], 'messages': [...]}
------
{'messages': [...], 'steps': [...]}
------
{'actions': [...], 'messages': [...]}
------
{'messages': [...], 'steps': [...]}
------
{'messages': [...],
 'output': "Les objets qui se trouvent à l'endroit où le chat se cache sont de "
           'la nourriture pour chat.'}


## Streming avec du RAG

Voici l'application Q&A avec les sources que nous avons construites à partir de l'article de Lilian Weng sur le blog [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/) dans le guide Returning sources :

In [None]:
!pip install chromadb

In [None]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [None]:
# Load, chunk and index the contents of the blog.
bs_strainer = bs4.SoupStrainer(class_=("post-content", "post-title", "post-header"))
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs={"parse_only": bs_strainer},
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)

### Stream la sortie finale

avec LCEL c'est plus facile il suffit juste de boucler dessus

In [None]:
for chunk in rag_chain_with_source.stream("Qu'est-ce que la 'Self-Reflexion?"):
    print(chunk)

{'question': "Qu'est-ce que la 'Self-Reflexion?"}
{'context': [Document(page_content='Fig. 3. Illustration of the Reflexion framework. (Image source: Shinn & Labash, 2023)\nThe heuristic function determines when the trajectory is inefficient or contains hallucination and should be stopped. Inefficient planning refers to trajectories that take too long without success. Hallucination is defined as encountering a sequence of consecutive identical actions that lead to the same observation in the environment.\nSelf-reflection is created by showing two-shot examples to LLM and each example is a pair of (failed trajectory, ideal reflection for guiding future changes in the plan). Then reflections are added into the agent’s working memory, up to three, to be used as context for querying LLM.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}), Document(page_content='Another quite distinct approach, LLM+P (Liu et al. 2023), involves relying on an external classical pl

On peut print la sortie par token

In [None]:
output = {}
curr_key = None
for chunk in rag_chain_with_source.stream("Qu'est-ce que la 'Self-Reflexion?"):
    for key in chunk:
        if key not in output:
            output[key] = chunk[key]
        else:
            output[key] += chunk[key]
        if key != curr_key:
            print(f"\n\n{key}: {chunk[key]}", end="", flush=True)
        else:
            print(chunk[key], end="", flush=True)
        curr_key = key
output



question: Qu'est-ce que la 'Self-Reflexion?

context: [Document(page_content='Fig. 3. Illustration of the Reflexion framework. (Image source: Shinn & Labash, 2023)\nThe heuristic function determines when the trajectory is inefficient or contains hallucination and should be stopped. Inefficient planning refers to trajectories that take too long without success. Hallucination is defined as encountering a sequence of consecutive identical actions that lead to the same observation in the environment.\nSelf-reflection is created by showing two-shot examples to LLM and each example is a pair of (failed trajectory, ideal reflection for guiding future changes in the plan). Then reflections are added into the agent’s working memory, up to three, to be used as context for querying LLM.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}), Document(page_content='Another quite distinct approach, LLM+P (Liu et al. 2023), involves relying on an external classical planner 

{'question': "Qu'est-ce que la 'Self-Reflexion?",
 'context': [Document(page_content='Fig. 3. Illustration of the Reflexion framework. (Image source: Shinn & Labash, 2023)\nThe heuristic function determines when the trajectory is inefficient or contains hallucination and should be stopped. Inefficient planning refers to trajectories that take too long without success. Hallucination is defined as encountering a sequence of consecutive identical actions that lead to the same observation in the environment.\nSelf-reflection is created by showing two-shot examples to LLM and each example is a pair of (failed trajectory, ideal reflection for guiding future changes in the plan). Then reflections are added into the agent’s working memory, up to three, to be used as context for querying LLM.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),
  Document(page_content='Another quite distinct approach, LLM+P (Liu et al. 2023), involves relying on an external classical 