In [1]:
!pip install python-dotenv
!pip install langchain
!pip install langchain-openai
!pip install langchain-chroma
!pip install wikipedia
!pip install pypdf
!pip install pinecone
!pip install faiss-cpu
!pip install lark
!pip install rank_bm25
!pip install langchain_community
!pip install langchain_experimental
!pip install langchainhub

Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.1
Collecting langchain
  Downloading langchain-0.2.11-py3-none-any.whl (990 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m990.3/990.3 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
Collecting langchain-core<0.3.0,>=0.2.23 (from langchain)
  Downloading langchain_core-0.2.23-py3-none-any.whl (374 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m374.2/374.2 kB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_text_splitters-0.2.2-py3-none-any.whl (25 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.93-py3-none-any.whl (139 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.8/139.8 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
Collectin

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
from dotenv import load_dotenv, find_dotenv

In [4]:
# Se cargan las credenciales

dotenv_path = "/content/drive/My Drive/ANALITICA_NOVIEMBRE/LANGCHAIN/.env"
load_dotenv(dotenv_path)
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

In [5]:
from langchain_openai import ChatOpenAI

In [6]:
llm = ChatOpenAI()
output = llm.invoke("Cual es la capital de Argentina") # model = "gpt-3.5-turbo"
print(output.content)

La capital de Argentina es Buenos Aires.


# 1- Tools

* Son interfaces que un `agente`, `cadena` o un `LLM` pueden utilizar para interactuar con el mundo.
* Una `tool` contiene 5 elementos:
    - **Nombre de la tool**
    - **Descripción de la tool**
    - **JSON Schema para referirse a los inputs que debe recibir la tool**
    - **Función a la cual llamar**
    - **Si el resultado debe ser retornado directamente al usuario**
* Tanto el `nombre`, `descripción` como el `json` ayudan al LLM a tomar la decisión de usar determinada función, dado que formarán parte del prompt. Luego se decide usar la función dentro de la tool.
* Cuanto más simple sea el `input` de una `tool` más fácil va a ser para el `LLM` poder utilizarla. La mayoría de los `agentes` trabajan con `tools` que posee `un solo input`.
* El `nombre`, `descripción` y el esquema `json` se usan en el mensaje. Por lo tanto, es realmente importante que sean `claros` y describan exactamente cómo se debe utilizar la `tool`.

## A- Default

### A.1 Wikipedia

* Wrapper around WikipediaAPI.

* To use, you should have the wikipedia python package installed.

* This wrapper will use the Wikipedia API to conduct searches and fetch page summaries.

* By default, it will return the page summaries of the top-k results.

* It limits the Document content by doc_content_chars_max.

In [7]:
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

In [8]:
api_wrapper = WikipediaAPIWrapper(top_k_results = 1, doc_content_chars_max = 100)
tool1 = WikipediaQueryRun(api_wrapper = api_wrapper)

In [9]:
tool1.name # Notar que viene ya predeterminado el nombre

'wikipedia'

In [11]:
tool1.description # Notar que viene ya predeterminada la descripción

'A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, facts, historical events, or other subjects. Input should be a search query.'

In [12]:
tool1.args # Esta función espera un argumento llamado 'query' que debe ser un 'str'

{'query': {'title': 'Query',
  'description': 'query to look up on wikipedia',
  'type': 'string'}}

In [13]:
tool1.return_direct

False

In [14]:
tool1.run({"query": "langchain"})

'Page: LangChain\nSummary: LangChain is a framework designed to simplify the creation of applications '

* También podemos llamar a esta herramienta con una entrada de un solo `str`.
* Podemos hacer esto porque esta herramienta espera solo un `input`.
* Si requiriera múltiples `inputs`, no podríamos hacerlo.

In [15]:
tool1.run("langchain")

'Page: LangChain\nSummary: LangChain is a framework designed to simplify the creation of applications '

### A.2- Pandas

In [16]:
from langchain.agents.agent_types import AgentType
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain_openai import ChatOpenAI

NOTE: this agent calls the Python agent under the hood, which executes LLM generated Python code

In [17]:
import pandas as pd
from langchain_openai import OpenAI

df = pd.read_csv(
    "https://raw.githubusercontent.com/pandas-dev/pandas/main/doc/data/titanic.csv"
)

In [18]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [22]:
llm = ChatOpenAI()

agent = create_pandas_dataframe_agent(llm, df, verbose = True, allow_dangerous_code=True)

In [23]:
df.shape[0]

891

In [24]:
agent.invoke("how many rows are there?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To find out the number of rows in the dataframe, I should use the `shape` attribute of the dataframe. 
Action: python_repl_ast
Action Input: df.shape[0m[36;1m[1;3m(891, 12)[0m[32;1m[1;3mThe dataframe has 891 rows.
Final Answer: 891 rows[0m

[1m> Finished chain.[0m


{'input': 'how many rows are there?', 'output': '891 rows'}

In [25]:
agent.invoke("¿Cuál es el promedio de edad de las mujeres?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: We need to filter the dataframe to only include rows where `Sex` is "female" and then calculate the average age.

Action: python_repl_ast
Action Input: df[df['Sex'] == 'female']['Age'].mean()[0m[36;1m[1;3m27.915708812260537[0m[32;1m[1;3mI now know the final answer.

Final Answer: The average age of women is 27.92.[0m

[1m> Finished chain.[0m


{'input': '¿Cuál es el promedio de edad de las mujeres?',
 'output': 'The average age of women is 27.92.'}

In [26]:
df[df['Sex'] == 'female']['Age'].mean()

27.915708812260537

## B- Customizando Default Tools

* Se pueden modificar los siguientes elementos:
    - **name**
    - **description**
    - **json schema**

In [51]:
from langchain_core.pydantic_v1 import BaseModel, Field

In [52]:
class WikiInputs(BaseModel):
    """Inputs to the wikipedia tool."""

    query: str = Field(
        description = "query to look up in Wikipedia, should be 3 or less words"
    )

In [53]:
tool2 = WikipediaQueryRun(
    name = "wiki-tool",
    description = "Look up things in wikipedia",
    args_schema = WikiInputs,
    api_wrapper = api_wrapper,
    return_direct = True,
)

In [54]:
print(tool1.name)
print(tool2.name)

wikipedia
wiki-tool


In [55]:
print(tool1.description)
print(tool2.description)

A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, facts, historical events, or other subjects. Input should be a search query.
Look up things in wikipedia


In [56]:
print(tool1.args)
print(tool2.args)

{'query': {'title': 'Query', 'description': 'query to look up on wikipedia', 'type': 'string'}}
{'query': {'title': 'Query', 'description': 'query to look up in Wikipedia, should be 3 or less words', 'type': 'string'}}


In [57]:
print(tool1.return_direct)
print(tool2.return_direct)

False
True


In [58]:
print(f"print de tool1: {tool1.run('langchain')}.")
print()
print(f"print de tool2: {tool2.run('langchain')}.")

print de tool1: Page: LangChain
Summary: LangChain is a framework designed to simplify the creation of applications .

print de tool2: Page: LangChain
Summary: LangChain is a framework designed to simplify the creation of applications .


* Hay una gran cantidad de `tools` que ya están creadas: https://python.langchain.com/docs/integrations/tools/
* También existen las `toolkits` que son un conjunto de `tools`, que ya vienen creadas.
* Por otro lado necesitaremos seguramente en algún momento crear nuestras propias `tools`.

## C- Creando Tools

### C.1- Aclaraciones previas

**Clases**

In [59]:
import pandas as pd
df = pd.read_csv(
    "https://raw.githubusercontent.com/pandas-dev/pandas/main/doc/data/titanic.csv"
)

In [60]:
class Dataset:
    def __init__(self, file_path):
        """
        Carga los datos desde un archivo CSV.
        """
        self.data = pd.read_csv(file_path)

    def dataframe(self):
        """
        Retorna el dataframe entero.
        """
        return self.data

    def preview(self, rows=5):
        """
        Muestra las primeras 'rows' filas del dataset.
        """
        return self.data.head(rows)

    def describe(self):
        """
        Devuelve una descripción estadística del dataset.
        """
        return self.data.describe()

In [61]:
dataset = Dataset("https://raw.githubusercontent.com/pandas-dev/pandas/main/doc/data/titanic.csv")

In [62]:
print(dataset.preview.__doc__)


        Muestra las primeras 'rows' filas del dataset.
        


In [63]:
dataset.preview()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [64]:
print(dataset.dataframe.__doc__)


        Retorna el dataframe entero.
        


In [65]:
dataset.dataframe()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [66]:
class SimpleDataset(Dataset):
    def __init__(self, file_path):
        """
        Inicializa la clase heredada Dataset con un archivo CSV.
        """
        super().__init__(file_path)

    def preview_extended(self, rows=10):
        """
        Extendemos el método preview para mostrar más filas por defecto.
        """
        return super().preview(rows)

In [67]:
dataset2 = SimpleDataset("https://raw.githubusercontent.com/pandas-dev/pandas/main/doc/data/titanic.csv")

In [68]:
dataset2.preview()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [69]:
dataset2.preview_extended(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


**Decoradores**

In [70]:
# Un decorador es una función que recibe otra función como argumento y devuelve una nueva función,
    #que usualmente modifica o extiende el comportamiento de la función original.

def logger(func): # logger es una función que toma otra función func como argumento.
    def wrapper(*args): # wrapper será la nueva función que se devolverá y que se ejecutará en lugar de la función original func.
        print(f"Llamando a la función '{func.__name__}' con argumentos {args}")
        result = func(*args)
        print(f"La función '{func.__name__}' ha terminado su ejecución")
        return result
    return wrapper

@logger
def suma(a, b):
    print(a + b)

@logger
def saludo(nombre):
    print(f"Hola {nombre}!")

In [71]:
suma(5, 3)

Llamando a la función 'suma' con argumentos (5, 3)
8
La función 'suma' ha terminado su ejecución


In [72]:
saludo("Alice")

Llamando a la función 'saludo' con argumentos ('Alice',)
Hola Alice!
La función 'saludo' ha terminado su ejecución


**Ahora si, sigamos!**

In [None]:
from langchain.tools import Tool

In [79]:
def quien_es_rufino(query):
    respuesta = f"La pregunta fue: {query}. La respuesta es: el hijo de Javi."
    return respuesta

In [80]:
run_quien_es_rufino_tool = Tool.from_function(
    name = "quien es Rufino",
    description = "Responde quién es Rufino.",
    func = quien_es_rufino,
    verbose = True
)

In [81]:
run_quien_es_rufino_tool.name

'quien es Rufino'

In [82]:
run_quien_es_rufino_tool.description

'Responde quién es Rufino.'

In [83]:
run_quien_es_rufino_tool.args

{'tool_input': {'type': 'string'}}

In [84]:
run_quien_es_rufino_tool.run("Quién es Rufino?")

[32;1m[1;3mLa pregunta fue: Quién es Rufino?. La respuesta es: el hijo de Javi.[0m

'La pregunta fue: Quién es Rufino?. La respuesta es: el hijo de Javi.'

### C.2 StructuredTool

In [98]:
from langchain.tools import StructuredTool

In [109]:
def search_function(query: str):
    return "LangChain"


search = StructuredTool.from_function(
    func=search_function,
    name="Search",
    description="useful for when you need to answer questions about current events",
)

In [110]:
print(search.name)
print(search.description)
print(search.args)

Search
useful for when you need to answer questions about current events
{'query': {'title': 'Query', 'type': 'string'}}


**Definiendo explícitamente la variable args**

In [116]:
from langchain.pydantic_v1 import BaseModel, Field

class CalculatorInput(BaseModel):
    a: int = Field(description="first number")
    b: int = Field(description="second number")


def multiply(a: int, b: int) -> int:
    """Multiply two numbers."""
    return a * b


calculator = StructuredTool.from_function(
    func=multiply,
    name="Calculator",
    description="multiply numbers",
    args_schema=CalculatorInput,
    return_direct=True,
)

In [117]:
print(calculator.name)
print(calculator.description)
print(calculator.args)

Calculator
multiply numbers
{'a': {'title': 'A', 'description': 'first number', 'type': 'integer'}, 'b': {'title': 'B', 'description': 'second number', 'type': 'integer'}}


### C.3 Decorator

In [85]:
from langchain.tools import tool

**Un solo input**

In [86]:
@tool
def search(query: str) -> str:
    """Look up things online."""
    return "LangChain"

In [87]:
print(search.name)
print(search.description)
print(search.args)

search
Look up things online.
{'query': {'title': 'Query', 'type': 'string'}}


In [88]:
search.run({"query": "langchain"})

'LangChain'

**2 inputs**

In [89]:
@tool
def multiply(a: int, b: int) -> int:
    """Multiply two numbers."""
    return a * b

In [90]:
print(multiply.name)
print(multiply.description)
print(multiply.args)

multiply
Multiply two numbers.
{'a': {'title': 'A', 'type': 'integer'}, 'b': {'title': 'B', 'type': 'integer'}}


In [91]:
multiply.run({"a": 2, "b": 3})

6

**Puedo customizar el nombre**

In [92]:
@tool("search-tool")
def search(query: str) -> str:
    """Look up things online."""
    return "LangChain"

In [93]:
print(search.name)
print(search.description)
print(search.args)

search-tool
Look up things online.
{'query': {'title': 'Query', 'type': 'string'}}


**Puedo customizar el input**

In [96]:
class SearchInput(BaseModel):
    query: str = Field(description = "should be a search query")


@tool("search-tool", args_schema=SearchInput, return_direct=True)
def search(query: str) -> str:
    """Look up things online."""
    return "LangChain"

In [97]:
print(search.name)
print(search.description)
print(search.args)
print(search.return_direct)

search-tool
Look up things online.
{'query': {'title': 'Query', 'description': 'should be a search query', 'type': 'string'}}
True


### C.4 BaseModel

In [None]:
def multiply2(a, b):
    """Multiply two numbers."""
    return a * b

In [None]:
multiply2(2,4)

8

In [None]:
multiply2(2.2,4.4)

9.680000000000001

In [None]:
from langchain.pydantic_v1 import BaseModel, Field

In [None]:
class CalculatorInput(BaseModel):
    nombre: str = Field(description="El nombre de la persona")
    numero: int = Field(description="lo que debe la persona")

In [None]:
def multiply3(a: int, b: int) -> int:
    """Multiply two numbers."""
    return a * b

In [None]:
calculator = StructuredTool.from_function(
    func = multiply3,
    name = "Calculator",
    description = "multiply numbers",
    args_schema = CalculatorInput,
    return_direct = True
)

NameError: name 'StructuredTool' is not defined

In [None]:
result = calculator.run({"a": 2, "b": 3})
print(result)

6


In [None]:
result = calculator.run({"a": 2.2, "b": 3.3})
print(result)

6


In [None]:
result = calculator.run({"a": 2.2, "b": 3.7})
print(result)

6
