# Conceitos avançados de Models

### Prompt few-shot

In [2]:
from langchain_openai import ChatOpenAI

from dotenv import load_dotenv

load_dotenv()

chat = ChatOpenAI()

In [3]:
from langchain_core.messages import HumanMessage, AIMessage

mensagens = [
    HumanMessage(content='Quanto é 1 + 1?'),
    AIMessage(content='2'),
    HumanMessage(content='Quanto é 10 * 5?'),
    AIMessage(content='50'),
    HumanMessage(content='Quanto é 10 + 3?'),
]

chat.invoke(mensagens)

AIMessage(content='10 + 3 é igual a 13.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 52, 'total_tokens': 63, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-65500778-67e3-4586-96c2-3feda5609158-0', usage_metadata={'input_tokens': 52, 'output_tokens': 11, 'total_tokens': 63, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

Isto é similar a formação de mensagens da api da OpenAI, mas com uma sintaxe diferente:

```python
mensagens = [
    {'role': 'user', 'content': 'Quanto é 1 + 1'},
    {'role': 'assistant', 'content': '2'},
    {'role': 'user', 'content': 'Quanto é 10 * 5'},
    {'role': 'assistant', 'content': '50'},
    {'role': 'user', 'content': 'Quanto é 10 + 3'},
]
```

## Utilizando outros modelos

In [2]:
from langchain_huggingface.chat_models.huggingface import ChatHuggingFace  # Atualizando importação!
from langchain_huggingface.llms.huggingface_endpoint import HuggingFaceEndpoint # Atualizando importação!

In [7]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [8]:
modelo = 'mistralai/Mixtral-8x7B-Instruct-v0.1'

llm = HuggingFaceEndpoint(repo_id=modelo)
chat = ChatHuggingFace(llm=llm)

tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [10]:
from langchain_core.messages import HumanMessage, AIMessage

mensagens = [
    HumanMessage(content='Quanto é 1 + 1?'),
    AIMessage(content='2'),
    HumanMessage(content='Quanto é 10 * 5?'),
    AIMessage(content='50'),
    HumanMessage(content='Quanto é 10 + 3?'),
]

chat.invoke(mensagens)

AIMessage(content=" 13. To determine the sum of two numbers, you can follow these steps:\n\n1. Identify the two numbers you want to add (in this case, 10 and 3)\n2. Write the numbers down, one above the other, with the smaller number on top and the larger number on the bottom.\n3. Starting from the right, add the digits in the ones place (in this case, 0 + 3 = 3)\n4. If the sum is greater than 9, carry the extra digit over to the next place value. In this case, the sum is only 3, so there is no need to carry anything over.\n5. Move to the next place value to the left, and add the digits in this place value, including the carried digit (if any) from the previous step. In this case, there are no digits in the tens place, so the sum is simply 10.\n6. The final sum is the total of the sums in each place value. In this case, the sum is 13 (10 in the tens place and 3 in the ones place).\n\nI hope this helps to clarify the process for adding numbers! If you have any more questions, don't hesi

A estrutura de chat_model utiliza a estrutura de llm como backend

In [11]:
import langchain

langchain.debug = True
chat.invoke(mensagens)
langchain.debug = False

[32;1m[1;3m[llm/start][0m [1m[llm:ChatHuggingFace] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: Quanto é 1 + 1?\nAI: 2\nHuman: Quanto é 10 * 5?\nAI: 50\nHuman: Quanto é 10 + 3?"
  ]
}
[36;1m[1;3m[llm/end][0m [1m[llm:ChatHuggingFace] [373ms] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": " 13. To determine the sum of two numbers, you can follow these steps:\n\n1. Identify the two numbers you want to add (in this case, 10 and 3)\n2. Write the numbers down, one above the other, with the smaller number on top and the larger number on the bottom.\n3. Starting from the right, add the digits in the ones place (in this case, 0 + 3 = 3)\n4. If the sum is greater than 9, carry the extra digit over to the next place value. In this case, the sum is only 3, so there is no need to carry anything over.\n5. Move to the next place value to the left, and add the digits in this place value, including the carried digit (if any) from the previou

> Atenção, nas versões mais atuais de langchain é recomendado utilizar o método set_debug para ativar o modo de debug, da seguinte forma:

In [7]:
from langchain.globals import set_debug

set_debug(True)
chat.invoke(mensagens)
set_debug(False)

[32;1m[1;3m[llm/start][0m [1m[llm:ChatHuggingFace] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: Quanto é 1 + 1?\nAI: 2\nHuman: Quanto é 10 * 5?\nAI: 50\nHuman: Quanto é 10 + 3?"
  ]
}
[36;1m[1;3m[llm/end][0m [1m[llm:ChatHuggingFace] [780ms] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": " 13\\*",
        "generation_info": {
          "finish_reason": "stop"
        },
        "type": "ChatGeneration",
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": " 13\\*",
            "response_metadata": {
              "token_usage": {
                "completion_tokens": 6,
                "prompt_tokens": 58,
                "total_tokens": 64
              },
              "model": "",
              "finish_reason": "stop"
            },
   

Outros modelos disponíveis:

https://python.langchain.com/docs/integrations/chat/

## Caching

### Cache em memória

In [None]:
from langchain_openai.chat_models import ChatOpenAI

from dotenv import load_dotenv

load_dotenv()

chat = ChatOpenAI(model='gpt-3.5-turbo-0125')

In [14]:
from langchain_core.messages import HumanMessage, SystemMessage

mensagens = [
    SystemMessage(content='Você é um assistente engraçado.'),
    HumanMessage(content='Quanto é 1 + 1?')
]

In [16]:
from langchain.cache import InMemoryCache
from langchain.globals import set_llm_cache

set_llm_cache(InMemoryCache())

Rodandando a primeira vez

In [17]:
%%time

chat.invoke(mensagens)

CPU times: user 89 ms, sys: 5.9 ms, total: 94.9 ms
Wall time: 2.53 s


AIMessage(content='Depende, em que planeta estamos fazendo essa operação? Brincadeira, a resposta é 2! Ou será que estou apenas dizendo isso para confundir você? 😉', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 42, 'prompt_tokens': 30, 'total_tokens': 72, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-c3c778cd-4189-4c89-a9f0-5d7d12880d67-0', usage_metadata={'input_tokens': 30, 'output_tokens': 42, 'total_tokens': 72, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

Rodando novamente

In [18]:
%%time

chat.invoke(mensagens)

CPU times: user 473 μs, sys: 70 μs, total: 543 μs
Wall time: 536 μs


AIMessage(content='Depende, em que planeta estamos fazendo essa operação? Brincadeira, a resposta é 2! Ou será que estou apenas dizendo isso para confundir você? 😉', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 42, 'prompt_tokens': 30, 'total_tokens': 72, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-c3c778cd-4189-4c89-a9f0-5d7d12880d67-0', usage_metadata={'input_tokens': 30, 'output_tokens': 42, 'total_tokens': 72, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

### Cache SQLite

In [13]:
from langchain.cache import SQLiteCache
from langchain.globals import set_llm_cache

set_llm_cache(SQLiteCache(database_path='arquivos/lancgchain_cache_db.sqlite'))

In [14]:
%%time

chat.invoke(mensagens)

CPU times: user 21.5 ms, sys: 131 μs, total: 21.7 ms
Wall time: 716 ms


AIMessage(content='Depende, você quer a resposta matemática correta ou a resposta que vai te fazer rir?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 23, 'prompt_tokens': 30, 'total_tokens': 53, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-ed74a411-4d7c-451a-b150-8169996bd3bb-0', usage_metadata={'input_tokens': 30, 'output_tokens': 23, 'total_tokens': 53, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [15]:
%%time

chat.invoke(mensagens)

CPU times: user 55.7 ms, sys: 11.6 ms, total: 67.4 ms
Wall time: 67.1 ms


AIMessage(content='Depende, você quer a resposta matemática correta ou a resposta que vai te fazer rir?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 23, 'prompt_tokens': 30, 'total_tokens': 53, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-ed74a411-4d7c-451a-b150-8169996bd3bb-0', usage_metadata={'input_tokens': 30, 'output_tokens': 23, 'total_tokens': 53, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [16]:
%%time

chat.invoke(mensagens)

CPU times: user 3.28 ms, sys: 0 ns, total: 3.28 ms
Wall time: 3.01 ms


AIMessage(content='Depende, você quer a resposta matemática correta ou a resposta que vai te fazer rir?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 23, 'prompt_tokens': 30, 'total_tokens': 53, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-ed74a411-4d7c-451a-b150-8169996bd3bb-0', usage_metadata={'input_tokens': 30, 'output_tokens': 23, 'total_tokens': 53, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})