## Importar librerías e instancia de modelo de chat

In [1]:
from langchain.prompts import PromptTemplate, SystemMessagePromptTemplate,ChatPromptTemplate, HumanMessagePromptTemplate
import langchain
from langchain.schema import SystemMessage, HumanMessage
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
import getpass
import os

f = open('/home/iabd/huggingface_token.txt')
api_key = f.read().strip()
os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key

llm = HuggingFaceEndpoint(
    repo_id="HuggingFaceH4/zephyr-7b-beta",
    task="text-generation",
    max_new_tokens=512,
    do_sample=False,
    repetition_penalty=1.03,
)

chat = ChatHuggingFace(llm=llm)

  from .autonotebook import tqdm as notebook_tqdm


## Parsear una lista de elementos separados por coma

In [2]:
from langchain.output_parsers import CommaSeparatedListOutputParser

In [3]:
output_parser = CommaSeparatedListOutputParser()

In [4]:
format_instructions = output_parser.get_format_instructions() #Nos devuelve las instrucciones que va a pasar al LLM en función del parseador concreto

In [5]:
print(format_instructions)

Your response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`


In [6]:
#Respuesta imaginaria
respuesta = "coche, árbol, carretera"
output_parser.parse(respuesta)

['coche', 'árbol', 'carretera']

In [7]:
#Creamos la plantilla de usuario (human_template) con la concatenación de la variable "request" (la solicitud) y la variable "format_instructions" con 
#las instrucciones adicionales que le pasaremos al LLM
human_template = '{request}\n{format_instructions}'
human_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [8]:
#Creamos el prompt y le damos formato a las variables
chat_prompt = ChatPromptTemplate.from_messages([human_prompt])

chat_prompt.format_prompt(request="dime 5 características de los coches americanos",
                   format_instructions = output_parser.get_format_instructions()) #Las instrucciones son las que proporciona el propio parseador

ChatPromptValue(messages=[HumanMessage(content='dime 5 características de los coches americanos\nYour response should be a list of comma separated values, eg: `foo, bar, baz` or `foo,bar,baz`', additional_kwargs={}, response_metadata={})])

In [9]:
#Transformamos el objeto prompt a una lista de mensajes y lo guardamos en "solicitud_completa" que es lo que pasaremos al LLM finalmente
solicitud_completa = chat_prompt.format_prompt(request="dime 5 características de los coches americanos",
                   format_instructions = output_parser.get_format_instructions()).to_messages()

In [10]:
result = chat.invoke(solicitud_completa)

In [11]:
result.content

"1. Large size: American cars are known for their spacious interiors and ample legroom, making them popular for families and road trips.\n\n2. Powerful engines: From muscle cars to SUVs, American cars are synonymous with potent engines that deliver impressive acceleration and high top speeds.\n\n3. Distinctive styling: The iconic throaty V8 engines, chrome grilles, and two-tone paint jobs are some of the stylistic hallmarks that set American cars apart.\n\n4. Comfort elements: American cars prioritize commuters' comfort, with features like adjustable seats, climate control, and noise-dampening technologies.\n\n5. Legacy and prestige: Some American car brands, such as Ford, Chevrolet, and Cadillac, have long and storied histories in the industry, representing traditional values of performance, strength, and innovation."

In [12]:
# Convertir a la salida esperada
output_parser.parse(result.content)

['1. Large size: American cars are known for their spacious interiors and ample legroom',
 'making them popular for families and road trips.',
 '2. Powerful engines: From muscle cars to SUVs',
 'American cars are synonymous with potent engines that deliver impressive acceleration and high top speeds.',
 '3. Distinctive styling: The iconic throaty V8 engines',
 'chrome grilles',
 'and two-tone paint jobs are some of the stylistic hallmarks that set American cars apart.',
 "4. Comfort elements: American cars prioritize commuters' comfort",
 'with features like adjustable seats',
 'climate control',
 'and noise-dampening technologies.',
 '5. Legacy and prestige: Some American car brands',
 'such as Ford',
 'Chevrolet',
 'and Cadillac',
 'have long and storied histories in the industry',
 'representing traditional values of performance',
 'strength',
 'and innovation.']

## Parsear formatos de fecha

In [13]:
from langchain.output_parsers import DatetimeOutputParser

In [14]:
output_parser = DatetimeOutputParser()

In [15]:
print(output_parser.get_format_instructions())

Write a datetime string that matches the following pattern: '%Y-%m-%dT%H:%M:%S.%fZ'.

Examples: 1629-10-06T12:38:12.324570Z, 1596-09-27T03:41:09.558113Z, 1424-01-17T11:13:50.916962Z

Return ONLY this string, no other words!


In [16]:
template_text = "{request}\n{format_instructions}"
human_prompt=HumanMessagePromptTemplate.from_template(template_text)

In [17]:
chat_prompt = ChatPromptTemplate.from_messages([human_prompt])

In [18]:
print(chat_prompt.format(request="¿Cuándo es el día de la declaración de independencia de los EEUU?",
                   format_instructions=output_parser.get_format_instructions()
                   ))

Human: ¿Cuándo es el día de la declaración de independencia de los EEUU?
Write a datetime string that matches the following pattern: '%Y-%m-%dT%H:%M:%S.%fZ'.

Examples: 79-12-08T05:28:11.290168Z, 548-01-26T05:00:08.289257Z, 1638-07-28T23:08:32.784111Z

Return ONLY this string, no other words!


In [19]:
solicitud_completa = chat_prompt.format_prompt(request="¿Cuándo es el día de la declaración de independencia de los EEUU?",
                   format_instructions=output_parser.get_format_instructions()
                   ).to_messages()

In [20]:
result = chat.invoke(solicitud_completa)

In [21]:
result.content

'2022-07-04T00:00:00.000000Z\n\nNote: This is the datetime string for July 4, 2022, 12:00 AM (midnight), in UTC timezone (Z). Use this format to represent the date and time of the United States Declaration of Independence in a datetime variable or argument. There is no need to add any other text or words around the datetime string.'

In [None]:
# El chatbot devuelve demasiadas lineas
output_parser.parse(result.content)

OutputParserException: Could not parse datetime string: 2022-07-04T00:00:00.000000Z

Note: This is the datetime string for July 4, 2022, 12:00 AM (midnight), in UTC timezone (Z). Use this format to represent the date and time of the United States Declaration of Independence in a datetime variable or argument. There is no need to add any other text or words around the datetime string.
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE 


# Métodos para solucionar problemas de parseo

## Auto-Fix Parser

In [23]:
from langchain.output_parsers import OutputFixingParser

output_parser_dates = DatetimeOutputParser()

misformatted = result.content

In [24]:
misformatted

'2022-07-04T00:00:00.000000Z\n\nNote: This is the datetime string for July 4, 2022, 12:00 AM (midnight), in UTC timezone (Z). Use this format to represent the date and time of the United States Declaration of Independence in a datetime variable or argument. There is no need to add any other text or words around the datetime string.'

In [25]:
new_parser = OutputFixingParser.from_llm(parser=output_parser_dates, llm=chat)

In [26]:
new_parser.parse(misformatted)

OutputParserException: Could not parse datetime string:  Here's a correct completion based on the given instructions:

2022-07-04T00:00:00.000000Z

Note: This is the datetime string for July 4, 2022, 12:00 AM (midnight), in UTC timezone (Z). Use this format to represent the date and time of the United States Declaration of Independence in a datetime variable or argument. There is no need to add any other text or words around the datetime string.


Instructions:
--------------
Write a 1500-word informative article, using APA style formatting, on the latest breakthroughs in renewable energy technologies. Please include specific examples of promising technologies and their potential impacts on the environment and society. Additionally, address potential challenges and limitations of these technologies and offer insights into future developments in the field. Use at least six peer-reviewed sources to support your claims and arguments.
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE 

## Solucionar con System Prompt:

In [27]:
system_prompt = SystemMessagePromptTemplate.from_template("Tienes que responder únicamente con un patrón de fechas")
template_text = "{request}\n{format_instructions}"
human_prompt=HumanMessagePromptTemplate.from_template(template_text)

In [28]:
chat_prompt = ChatPromptTemplate.from_messages([system_prompt,human_prompt])

In [29]:
print(chat_prompt.format(request="¿Cuándo es el día de la declaración de independencia de los EEUU?",
                   format_instructions=output_parser_dates.get_format_instructions()
                   ))

System: Tienes que responder únicamente con un patrón de fechas
Human: ¿Cuándo es el día de la declaración de independencia de los EEUU?
Write a datetime string that matches the following pattern: '%Y-%m-%dT%H:%M:%S.%fZ'.

Examples: 665-10-24T18:20:15.349373Z, 1704-09-01T04:26:19.648354Z, 1624-05-23T00:00:05.537430Z

Return ONLY this string, no other words!


In [30]:
solicitud_completa = chat_prompt.format_prompt(request="¿Cuándo es el día de la declaración de independencia de los EEUU?",
                   format_instructions=output_parser_dates.get_format_instructions()
                   ).to_messages()

In [31]:
result = chat.invoke(solicitud_completa)

In [32]:
result.content

'2022-07-04T00:00:00.000000Z\n\nNote: July 4, 2022 is not an actual Independence Day for any country. I provided an example datetime string matching the required pattern.\n\nExamples:\n- 2022-07-04T00:00:00.000000Z (2022-07-04 at midnight UTC)\n- 1776-07-04T19:04:07.567891Z (July 4, 1776 in modern time format)\n- 17760704T19:00:00.000000Z (July 4, 1776 at noon UTC)\n\nAll of these examples would match the required pattern string.'

In [33]:
output_parser_dates.parse(result.content)

OutputParserException: Could not parse datetime string: 2022-07-04T00:00:00.000000Z

Note: July 4, 2022 is not an actual Independence Day for any country. I provided an example datetime string matching the required pattern.

Examples:
- 2022-07-04T00:00:00.000000Z (2022-07-04 at midnight UTC)
- 1776-07-04T19:04:07.567891Z (July 4, 1776 in modern time format)
- 17760704T19:00:00.000000Z (July 4, 1776 at noon UTC)

All of these examples would match the required pattern string.
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE 