# Feed

In [1]:
!pip install jq
!pip install nest_asyncio
!pip install langchain_chroma langchain_openai


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.1[0m[39;49m -> [0m[32;49m24.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.1[0m[39;49m -> [0m[32;49m24.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.1[0m[39;49m -> [0m[32;49m24.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


# Get Data

In [2]:
import asyncio
from aiohttp import ClientSession
import nest_asyncio

nest_asyncio.apply()

base_url = 'https://pokeapi.co/api/v2/pokemon'

async def fetch_json(session, url):
    async with session.get(url) as response:
        response =  await response.json()
        response['url'] = url
        return response

async def fetch_all_pokemon_details(base_url, limit=10):
    async with ClientSession() as session:
        all_pokemon = []
        next_url = f"{base_url}?offset=0&limit={limit}"

        while next_url:
            data = await fetch_json(session, next_url)
            all_pokemon.extend(data['results'])
            next_url = None
            #next_url = data['next']

        tasks = [fetch_json(session, pokemon['url']) for pokemon in all_pokemon]
        all_pokemon_details = await asyncio.gather(*tasks)
        return all_pokemon_details


async def fetch_all_pokemons(limit):
    base_url = "https://pokeapi.co/api/v2/pokemon"
    all_pokemon_details = asyncio.run(fetch_all_pokemon_details(base_url, limit))
    data = []
    for details in all_pokemon_details:
        data.append({"url": details["url"], "name": details['name'], "ID": details['id'], "types": ', '.join(t['type']['name'] for t in details['types'])})
    return data

pokemons = await fetch_all_pokemons(20)

# Load and embed

In [3]:
from langchain_community.document_loaders import JSONLoader
import tempfile
import os
import json


with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.json') as temp_file:
    json.dump(pokemons, temp_file, indent=4)
    temp_file_path = temp_file.name

def metadata_func(record: dict, metadata: dict) -> dict:
    metadata['source'] = record['url']
    return metadata

loader = JSONLoader(
    jq_schema=".[]",
    file_path=temp_file_path,
    text_content=False,
    metadata_func=metadata_func
)


documents = loader.load()
print(documents)
os.remove(temp_file_path)

[Document(metadata={'source': 'https://pokeapi.co/api/v2/pokemon/1/', 'seq_num': 1}, page_content='{"url": "https://pokeapi.co/api/v2/pokemon/1/", "name": "bulbasaur", "ID": 1, "types": "grass, poison"}'), Document(metadata={'source': 'https://pokeapi.co/api/v2/pokemon/2/', 'seq_num': 2}, page_content='{"url": "https://pokeapi.co/api/v2/pokemon/2/", "name": "ivysaur", "ID": 2, "types": "grass, poison"}'), Document(metadata={'source': 'https://pokeapi.co/api/v2/pokemon/3/', 'seq_num': 3}, page_content='{"url": "https://pokeapi.co/api/v2/pokemon/3/", "name": "venusaur", "ID": 3, "types": "grass, poison"}'), Document(metadata={'source': 'https://pokeapi.co/api/v2/pokemon/4/', 'seq_num': 4}, page_content='{"url": "https://pokeapi.co/api/v2/pokemon/4/", "name": "charmander", "ID": 4, "types": "fire"}'), Document(metadata={'source': 'https://pokeapi.co/api/v2/pokemon/5/', 'seq_num': 5}, page_content='{"url": "https://pokeapi.co/api/v2/pokemon/5/", "name": "charmeleon", "ID": 5, "types": "fir

# Embed and store

In [4]:

from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv, find_dotenv

load_dotenv()

embedding = OpenAIEmbeddings()

db = Chroma(persist_directory="../db/chroma/pokemons", embedding_function=embedding, collection_name="pokemon")
db.add_documents(documents=documents)

['3fdf52db-f01f-44a7-ba50-c17ebeba45d9',
 'e89a8064-9aeb-4a5f-9e71-f6a8518f78ed',
 'ebde91f7-d5c3-43b1-a19f-cec2f5fbb6a2',
 'b58c369b-7437-4bf4-b9e5-a85044f98251',
 '54242400-c185-4dc0-b0a4-7453ea72baaa',
 '6e0698a4-9892-4283-bf75-6fe04842dbe6',
 'a18f258f-04d6-4927-b6a9-b039e637b9cb',
 '2c62db82-6b9c-4082-b94a-953999e653be',
 '4836ab89-c12e-4c3b-a846-419fa957d1f3',
 'f13804e7-c453-4a8e-bf3e-649c1d8e7676',
 '325e38de-9ffa-4400-971f-6a30eb827c00',
 '44d43070-f5bf-4c1a-a785-c63da381794a',
 'e50fbb83-cc30-42d1-af31-05df359210a8',
 '968081cc-e7cb-41e7-8900-fd2971402282',
 '08ab4acb-cf5b-4dc3-af15-3f3b2152f767',
 '93d11572-d311-403b-8cb7-a0812e6b9ee8',
 '7b4b6e67-e33c-4832-8a46-13ea0fb89182',
 '20012597-1929-413e-b79d-117c5aed48f1',
 'd95010ab-d4fc-471b-afa1-9065688150d4',
 'fd548006-b538-46ce-91c9-4b45d1b8efc6']

# Retrieve

In [5]:
from langchain.retrievers import SelfQueryRetriever
from langchain_openai import OpenAI
from langchain_community.query_constructors.chroma import ChromaTranslator
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_core.prompts import PromptTemplate

llm = OpenAI(temperature=0)

metadata_fields_info = [
    {"name": "name", "description": "Pokemon name", "type": "string"},
    {"name": "ID", "description": "Pokemon ID", "type": "string"},
    {"name": "types", "description": "Pokemon types (fire, water, etc...), comma separated", "type": "string"},
]

retriever = SelfQueryRetriever.from_llm(
    llm=llm,
    vectorstore=db,
    document_contents="List of pokemonons",
    metadata_field_info=metadata_fields_info,
    structured_query_translator=ChromaTranslator(),
    enable_limit=True,
)

template = """
You are an helpful assistant, 
Format the response from the following context only, do not use your own knowledge.
If you don't know, say : "I don't know"

Context: ```
{context}
```

user query: ```{input}```
answer: 
"""

prompt = PromptTemplate(template=template, input_variables=['input', 'context'])

context = RunnableParallel(
    context=retriever,
    input=RunnablePassthrough()
)

print(retriever.invoke('what is the id of bulbasaur'))

chain = context | prompt | llm

response = print(chain.invoke('How many pokemons do you know ?'))

[Document(metadata={'seq_num': 1, 'source': 'https://pokeapi.co/api/v2/pokemon/1/'}, page_content='{"url": "https://pokeapi.co/api/v2/pokemon/1/", "name": "bulbasaur", "ID": 1, "types": "grass, poison"}'), Document(metadata={'seq_num': 2, 'source': 'https://pokeapi.co/api/v2/pokemon/2/'}, page_content='{"url": "https://pokeapi.co/api/v2/pokemon/2/", "name": "ivysaur", "ID": 2, "types": "grass, poison"}'), Document(metadata={'seq_num': 3, 'source': 'https://pokeapi.co/api/v2/pokemon/3/'}, page_content='{"url": "https://pokeapi.co/api/v2/pokemon/3/", "name": "venusaur", "ID": 3, "types": "grass, poison"}'), Document(metadata={'seq_num': 9, 'source': 'https://pokeapi.co/api/v2/pokemon/9/'}, page_content='{"url": "https://pokeapi.co/api/v2/pokemon/9/", "name": "blastoise", "ID": 9, "types": "water"}')]
I know 4 pokemons: pidgeotto, pidgeot, pidgey, and beedrill.


In [6]:
# Create stuff retriver
# doc: https://python.langchain.com/v0.2/docs/tutorials/summarization/#:~:text=Map%2Dreduce%2C%20which%20splits%20documents,the%20documents%20in%20a%20sequence.

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain, StuffDocumentsChain, LLMChain
from langchain.chains.summarize import load_summarize_chain
from langchain_core.documents import Document

import textwrap
chain = load_summarize_chain(llm, chain_type="map_reduce",verbose = False)
output_summary = chain.run(documents)
wrapped_text = textwrap.fill(output_summary, width=100)
print(wrapped_text)


# With stuffdocument chain
# Define prompt
prompt_template = """Count and classify the document to get an overall preview of all documents
"{context}"
SUMMARY:"""
prompt = PromptTemplate.from_template(prompt_template)

# Define LLM chain
llm_chain = prompt | llm

# Define StuffDocumentsChain
stuff_chain = StuffDocumentsChain(llm_chain=llm_chain)


summary  = stuff_chain.invoke(documents)["output_text"]

summary_doc = Document(summary)
summary_db = Chroma(persist_directory="../db/chroma/pokemon_summary", embedding_function=embedding, collection_name="summary")
summary_db.add_documents([summary_doc])


  warn_deprecated(


 This data contains information about various Pokemon, including their names, ID numbers, types, and
URLs for more information. The Pokemon range from grass and poison types like Bulbasaur and
Venusaur, to fire and flying types like Charizard, to bug and poison types like Weedle and Beedrill,
to normal and flying types like Pidgey and Pidgeot, and finally to normal types like Rattata and
Raticate.


  warn_deprecated(


['abe31a4d-2543-4660-8c10-8b69de053474']

In [48]:
# Next step, create a tool to get the summary, a tool to get the retriever and an agent

