## Testig MistralClient

### Chat Minstral (without streaming)

In [20]:
from mistralai.client import MistralClient
from mistralai.models.chat_completion import ChatMessage
from dotenv import load_dotenv
import os

# Changing the API key file
load_dotenv() # Para cargar .env

  
# Function to intect with the Mistral API and get a response
def chat_mistral(user_content):
  # Initialize the Mistral client with API key
  api_key = os.getenv("MISTRAL_API_KEY")
  model = 'mistral-tiny'
  client = MistralClient(api_key=api_key)

  # Prepare a list of ChatMessage objects with the user's content
  messages = [ChatMessage(role='user', content=user_content)]

  # Get the response from the Mistral API without streaming
  chat_response = client.chat(model=model, messages=messages)

  try:
    # Extract the content from the response
    response_content = chat_response.choices[0].message.content if chat_response.choices else ""
  except AttributeError as e:
    print(f'An error occurred while processing the response: {e}')
    response_content = ""
  
  # Return the response content
  return response_content

CYAN = '\033[96m'
# ANSI escape code for yellow color
YELLOW = '\033[93m'
# ANSI escape code to reset to default color
RESET_COLOR = '\033[0m'

# Example usage
user_content = "Tengo que secar 5 camisas al sol. Luego de 10 horas todas las camisas están secas. \
  Al siguiente día tengo que secar 20 camisas, ¿qué tiempo me tomará?"

response = chat_mistral(user_content)
print(f'{CYAN}{response}{RESET_COLOR}')

[96mBased on the information provided, it takes 10 hours for one batch of 5 shirts to dry in the sun. To find out how long it will take to dry 20 shirts, we first need to determine how long it takes to dry one shirt:

1 shirt = 5 shirts / 5
1 shirt = 1 shirt

Now we know that it takes 1 hour to dry one shirt (since it takes 10 hours to dry 5 shirts):

Time to dry 1 shirt = 10 hours / 5 shirts = 2 hours / 1 shirt

Finally, to find out how long it takes to dry 20 shirts, we can multiply the time it takes to dry one shirt by the number of shirts:

Time to dry 20 shirts = 2 hours / 1 shirt * 20 shirts = 40 hours

So, it will take 40 hours to dry 20 shirts in the sun.[0m


### Embedding Mistral

In [23]:
from mistralai.client import MistralClient
from mistralai.models.chat_completion import ChatMessage
#import asyncio

from dotenv import load_dotenv
import os
load_dotenv() # Para cargar .env

def embed_mistral(user_content):
    api_key = os.getenv("MISTRAL_API_KEY")
    client = MistralClient(api_key=api_key)
    embedding_response = client.embeddings(
        model ='mistral-embed',
        input = user_content,
    )
    return embedding_response

# async def async_embed_mistral(user_content):
#     api_key = os.getenv("MISTRAL_API_KEY")
#     client = MistralClient(api_key=api_key)
#     embedding_response = await client.embeddings(
#         model ='mistral-embed',
#         input = user_content * 10
#     )

text = "Hello from TechNision"
enbeded_text = embed_mistral(text)
enbeded_text

EmbeddingResponse(id='embd-f93a54785b904ebca5c11626eb3d66f6', object='list', data=[EmbeddingObject(object='embedding', embedding=[-0.0295562744140625, -0.0178985595703125, 0.0177154541015625, 0.02166748046875, 0.0195465087890625, -0.011962890625, 0.05780029296875, 0.00487518310546875, 0.005107879638671875, -0.0190582275390625, -0.047760009765625, 0.0345458984375, -0.0236053466796875, -0.035125732421875, -0.0509033203125, 0.007305145263671875, -0.01313018798828125, -0.0079498291015625, 0.060028076171875, 0.03607177734375, -0.04376220703125, 0.041412353515625, -0.055816650390625, -0.0030574798583984375, -0.034332275390625, -0.00415802001953125, -0.009552001953125, -0.00864410400390625, -0.0509033203125, -0.00647735595703125, 0.0004870891571044922, -0.043609619140625, -0.034210205078125, -0.003719329833984375, 0.049530029296875, -0.0229644775390625, -0.045745849609375, -0.0509033203125, 0.02667236328125, 0.0189361572265625, -0.00623321533203125, -0.00522613525390625, -0.001681327819824218

In [22]:
import asyncio

async def greet(name):
    print(f"¡Hola, {name}!")
    await asyncio.sleep(2)  # Simula una operación que toma 2 segundos
    print(f"¡Adiós, {name}!")

async def main():
    # Ejecutar dos saludos de manera concurrente
    task1 = asyncio.create_task(greet("Alice"))
    task2 = asyncio.create_task(greet("Bob"))

    # Esperar a que ambas tareas se completen
    await task1
    await task2

# Ejecutar el bucle de eventos de asyncio para ejecutar las tareas
asyncio.run(main())


RuntimeError: asyncio.run() cannot be called from a running event loop

## Basic prompt + model + output partser

In [11]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate


load_dotenv() # Para cargar .env
openai_api_key = os.getenv("OPENAI_API_KEY")

# 1. Prompt 
prompt = ChatPromptTemplate.from_template("Tengo que secar 5 camisas al sol. Luego de 10 horas todas las camisas están secas. Al siguiente día tengo que secar {num_camisas} camisas, ¿qué tiempo me tomará?")

# 2a. Chat Model
from langchain_openai import ChatOpenAI
model = ChatOpenAI(
  model='gpt-3.5-turbo-0613',
  api_key=openai_api_key
  )

# 3. Parsert instantion
output_parser = StrOutputParser()

# 4. chaining
chain = prompt | model | output_parser

# 5. Invoke
chain.invoke({'num_camisas': '20'})

'Si todas las camisas se secan al sol en 10 horas, entonces podemos decir que se seca 1 camisa cada 2 horas (10 horas / 5 camisas = 2 horas por camisa).\n\nSi al siguiente día tienes que secar 20 camisas, entonces tomará 2 horas por camisa multiplicado por 20 camisas, lo que nos da un total de 40 horas.\n\nPor lo tanto, te tomará 40 horas secar las 20 camisas al sol al día siguiente.'

In [32]:
# 2a. LLM Model
from langchain_openai.llms import OpenAI
llm = OpenAI(
  model="gpt-3.5-turbo-instruct",
  api_key=openai_api_key)

# 3. Parsert instantion
output_parser = StrOutputParser()

# 4. chaining
chain = prompt | llm | output_parser

# 5. Invoke
message = chain.invoke({'num_camisas': '20'})
message

'\n\nEl tiempo que te tomará dependerá de factores como la intensidad del sol y la humedad del ambiente, pero considerando que se mantienen las mismas condiciones, podrías estimar que te tomará aproximadamente 40 horas, ya que estarías secando cuatro veces más camisas que en el primer día. Sin embargo, es importante tener en cuenta que el proceso de secado puede ser más rápido o más lento según las condiciones mencionadas anteriormente. '

#### With mistralai

In [12]:
# 2a. LLM Model
# Changing the API key file
load_dotenv() # Para cargar .env
mistral_api_key = os.getenv("MISTRAL_API_KEY")

from langchain_mistralai import ChatMistralAI
chat_mistral = ChatMistralAI(
  model="mistral-small",
  api_key=mistral_api_key)

# 3. Parsert instantion
output_parser = StrOutputParser()

# 4. chaining
chain = prompt | chat_mistral | output_parser

# 5. Invoke
message = chain.invoke({'num_camisas': '20'})
message

'Si tienes suficiente espacio para tender 20 camisas al sol y las condiciones climáticas son similares, entonces deberían secarse en el mismo tiempo, es decir, 10 horas. Esto se debe a que el tiempo de secado depende del ambiente y las propiedades de absorción de la humedad de las camisas, no del número de camisas que estás secando.\n\nSin embargo, si el espacio es limitado y debes tender las 20 camisas en lotes, entonces el tiempo de secado completo será mayor a 10 horas. Por ejemplo, si puedes tender solo 5 camisas a la vez, necesitarías 4 lotes para secar las 20 camisas, lo que requeriría 40 horas en total (10 horas * 4 lotes). \n\nEspero que esta información te sea útil. Si tienes alguna otra pregunta, no dudes en preguntarme.'

In [20]:
from langchain_community.embeddings import HuggingFaceEmbeddings


ImportError: cannot import name 'MistralEmbeddings' from 'langchain_community.embeddings' (/Users/Eber/Library/CloudStorage/OneDrive-Personal/Documentos/3. TechNision/Projects/Data_extractor_from_documents/.venv/lib/python3.11/site-packages/langchain_community/embeddings/__init__.py)

## RAG search from documen in memory

In [17]:
from operator import itemgetter

from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_mistralai import ChatMistralAI
from mistralai.client import MistralClient
from dotenv import load_dotenv
import os
load_dotenv() # Para cargar .env

# 1. Enbedding
def embedding_mistral(user_content):
    api_key = os.getenv("MISTRAL_API_KEY")
    client = MistralClient(api_key=api_key)
    embedding_response = client.embedding(
        model ='mistral-embed',
        input = user_content,
    )
    return embedding_response


# 2. Vectostore
vectorstore = FAISS.from_texts(
    ['harrison worked at kensho'], 
    embedding = embedding_mistral
)

# 3. Retrival
retriever = vectorstore.as_retriever()

# 4. prompt
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

# 5. Chatmodel
chat_mistral = ChatMistralAI(
  model="mistral-small",
  api_key=mistral_api_key)

# 6. chain
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | chat_mistral
    | StrOutputParser()     
)

# 7. invoke
result = chain.invoke("Where did harrison work?")

AttributeError: 'function' object has no attribute 'embed_documents'

In [None]:
from langchain_community.vectorstores import DocArrayInMemorySearch
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_openai.chat_models import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings

load_dotenv() # Para cargar .env
openai_api_key = os.getenv("OPENAI_API_KEY")

# 1. Vector store
vectorstore = DocArrayInMemorySearch.from_texts(
  texts = ["harrizon worked at kensho", "bears like to eat honey"],
  metadata = {},
  embedding = OpenAIEmbeddings(
    model="text-embedding-ada-002",
    api_key=openai_api_key
  )
)

# 2. retriever
retriever = vectorstore.as_retriever()

# 3. Prompt
template = """Answers only in spanish the question based only on the following context: {context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

# 4. model
model = ChatOpenAI(
  model='gpt-3.5-turbo-0613',
  api_key=openai_api_key
  )

# parser
output_parser = StrOutputParser()

setup_and_retrieval = RunnableParallel(
  {'context': retriever, 'question': RunnablePassthrough()}
)

chain = setup_and_retrieval | prompt | model | output_parser

# output
response = chain.invoke("Where did harrison work?")
response

### Testing pydantic

In [9]:
from pydantic import BaseModel, ValidationError

# Defining model
class User(BaseModel):
    id: int
    username: str
    email: str

# Correct data input
input_data_correct = {
    'id': 1,
    'username': 'john_doe',
    'email': 'john@technision.io'
}

# Incorrect data input
input_data_incorrect = {
    'id': 2,
    'username': 'jane_smith'
}

try:
    # Check data input
    user1 = User(**input_data_correct)
    print('Valid data input: '.format(user1))
except ValidationError as e:
    print('Validation Error: {}'.format(e))

# try:
#     # Trying to valid incorrect data
#     user2 = User(**input_data_incorrect)
#     print('Valid data input: '.format(user2))
# except ValidationError as e:
#     print('Validation error: {}'.format(e))

Valid data input: 


### Testing `DocArrayInMemorySearch`

In [15]:
from langchain_community.vectorstores import DocArrayInMemorySearch
from langchain_openai import OpenAIEmbeddings

# Text examples
texts = [
    'El PEI es un documento de planeamiento a nivel pliego constituido por OEI y AEI',
    'El POI es un documento de planeamiento a nivel UE constituido por AO e inversiones',
    'El PESEM es un documento de planeamiento a nivel sectorial constituido por OES y AES',
]

# Start enbedding
embedding = OpenAIEmbeddings()

# Instancia de DocArrayInMemorySearch
db = DocArrayInMemorySearch.from_texts(texts, embedding)

# try search
query = "¿Qué es el POI?"
similar_docs = db.similarity_search(query, metadata={})

for doc in similar_docs:
    print(doc.text)

ValidationError: 2 validation errors for DocArrayDoc
text
  Field required [type=missing, input_value={'embedding': [-0.0090137... -0.007030032780268735]}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.5/v/missing
metadata
  Field required [type=missing, input_value={'embedding': [-0.0090137... -0.007030032780268735]}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.5/v/missing

### Testing agents