# Milvus Agent

### Milvus

In [2]:
import os
from pymilvus import MilvusClient, connections, FieldSchema, CollectionSchema, DataType, Collection, utility

# Replace uri and token with your own
client = MilvusClient(
    uri=os.getenv("MILVUS_ENDPOINT"),
    token=os.getenv("MILVUS_API_KEY") # API key or a colon-separated cluster username and password
)

# Connect to cluster
connections.connect(
  alias='default',
  # Public endpoint obtained from Zilliz Cloud
  uri=os.getenv("MILVUS_ENDPOINT"),
  # API key or a colon-separated cluster username and password
  token=os.getenv("MILVUS_API_KEY")
)

In [26]:
from pymilvus import Collection
collection = Collection("News_2023")      # Get an existing collection.
collection.load()

In [37]:
model.encode("El ministro de Justicia Luis Cordero asume su cargo en enero del 2023")

array([-0.13902153,  0.06297969,  0.12189946, -0.15383331,  0.08737598,
        0.15639697, -0.11988122,  0.01344451,  0.02991877,  0.3828121 ,
        0.02800514, -0.03443082, -0.39853036,  0.25623912,  0.12761955,
        0.1216578 , -0.14694041, -0.31341937, -0.02103888,  0.17736349,
        0.39888558,  0.13457789, -0.02190801, -0.28505543, -0.1663533 ,
       -0.06352693, -0.12871332,  0.0229103 , -0.03431107, -0.17860012,
       -0.08872243, -0.06442707,  0.39241984,  0.04267769,  0.14347081,
       -0.0277954 ,  0.22695011,  0.08359052,  0.22287366,  0.24982116,
        0.03286254,  0.00724386, -0.147435  ,  0.05207216,  0.34002858,
       -0.30910796, -0.02211615,  0.03677581, -0.03282573, -0.08547797,
       -0.11395331,  0.18369544,  0.3546018 ,  0.1734595 , -0.07541618,
       -0.1642773 ,  0.06981933, -0.07859086,  0.09296937,  0.06477956,
       -0.21263257,  0.37822843, -0.31583673,  0.08498256, -0.13836181,
        0.04286056,  0.15013874,  0.0503443 , -0.0175321 , -0.14

In [27]:
from typing import List, Any
from langchain.tools import tool
from langchain.pydantic_v1 import BaseModel, Field
from sentence_transformers import SentenceTransformer
# Cargar el modelo de Sentence Transformers y moverlo a CUDA si está disponible
model_name = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
model = SentenceTransformer(model_name)

@tool('milvus_query_results')
def milvus_query_results(query: dict) -> List[Any]:
    "Gives a list of document found by the query"
    query = query['title']
    vect = model.encode(query)
    results = collection.search(
        data=[vect], 
        anns_field="vector", 
        param={"metric_type": "L2", "params": {"nprobe": 1}}, 
        limit=5, 
        expr=None,
        consistency_level="Strong"
    )
    ids = results[0].ids
    res = collection.query(
        expr =f"""id in {ids}""" , 
        output_fields = ["body"],
        consistency_level="Strong"
    )
    return res

In [38]:
vect = model.encode('El ministro de Justicia Luis Cordero asume su cargo en enero del 2023')
results = collection.search(
    data=[vect], 
    anns_field="vector", 
    param={"metric_type": "L2", "params": {"nprobe": 1}}, 
    limit=5, 
    expr=None,
    consistency_level="Strong"
)

### LLM and Agent-Chain

In [28]:
tools = [milvus_query_results]

In [29]:
from langchain_together import Together
import os
from dotenv import load_dotenv
load_dotenv()

llm = Together(
    model="mistralai/Mixtral-8x7B-Instruct-v0.1",
    temperature=0,
    max_tokens=1024,
    top_k=1,
    together_api_key=os.getenv("TOGETHER_API_KEY")
)

In [19]:

DEFAULT_TEMPLATE = """Eres un asistente de IA especializado en verificar hechos sobre la realidad chilena en el año 2023. 
Tu deber es verificar la veracidad de las afirmaciones presentadas relacionadas con eventos, situaciones o datos sobre Chile durante el año 2023, y responder si son verdaderas o falsas de manera concisa, sin proporcionar explicaciones adicionales. 
Para revisar si la afirmacion es correcta, harás una consulta a una base de datos con la siguiente herramienta:

Herramientas:
{tools}

Si no tienes suficiente información en tu base de conocimientos para determinar la veracidad de una afirmación sobre la realidad chilena en 2023, debes responder honestamente que no lo sabes. 
Tus respuestas deben ser siempre en español.

Luego de obtener las informacion de la base de datos, debes responder unicamente con 'Verdadero', 'Falso' o 'No lo sé' a la afirmación sobre la realidad chilena en 2023: 
    
Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).

Valid "action" values: "Final Answer" or {tool_names}

Provide only ONE action per $JSON_BLOB, as shown:

```
{{
    "action": $TOOL_NAME,
    "action_input": $INPUT
}}```

Follow this format:

Question: input question to answer
Thought: consider previous and subsequent steps
Action:
```
$JSON_BLOB
```
    Observation: action result
    ... (repeat Thought/Action/Observation N times)
    Thought: I know what to respond
        Action:
        ```
            {{
                "action": "Final Answer",
                "action_input": "Final response to human"
            }}
            
For example, if you want to use a tool to make a query to the vectore store database, your $JSON_BLOB might look like this:

```
{{
    'action': 'vector_store_query_json', 
    'action_input': {{'query': 'Example query'}}
}}```

Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation'
"""

In [30]:
DEFAULT_TEMPLATE = """You are a helpfull AI assistant that helps to makes querys to a vectore store database given a human consult of a specific topic.
The AI gives information and explanation from the results of the query.
The AI is talkative and provides a lot of specific details of its context.
If the AI does not know the answer to the question, it will truthfully sai it does not know.

You have access to the following tools:
{tools}

Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).

Valid "action" values: "Final Answer" or {tool_names}

Provide only ONE action per $JSON_BLOB, as shown:

```
{{
    "action": $TOOL_NAME,
    "action_input": $INPUT
}}```

Follow this format:

Question: input question to answer
Thought: consider previous and subsequent steps
Action:
```
$JSON_BLOB
```
    Observation: action result
    ... (repeat Thought/Action/Observation N times)
    Thought: I know what to respond
        Action:
        ```
            {{
                "action": "Final Answer",
                "action_input": "Final response to human"
            }}
            
For example, if you want to use a tool to make a query to the vectore store database, your $JSON_BLOB might look like this:

```
{{
    'action': 'vector_store_query_json', 
    'action_input': {{'query': 'Example query'}}
}}```

Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation'
"""


In [31]:
from langchain import hub
prompt = hub.pull("hwchase17/structured-chat-agent")
#prompt[0].prompt.template = prompt[0].prompt.template[0:394] + "```\n\n\nFor example, if you want to use a tool to get an image's caption, your $JSON_BLOB might look like this:\n\n```\n{{\n    'action': 'image_captioner_json', \n    'action_input': {{'query': 'images url'}}\n}}```" + "```\n\n\nIf you want to use a tool to search for information, your $JSON_BLOB might look like this:\n\n```\n{{\n    'action': 'tavily_search_results_json', \n    'action_input': {{'query': 'Example search query'}}\n}}```"  +prompt[0].prompt.template[394:]


In [32]:
prompt[0].prompt.template = DEFAULT_TEMPLATE

In [33]:
from langchain.agents import create_openai_functions_agent
from langchain.agents import AgentExecutor, create_openai_tools_agent, create_structured_chat_agent
#agent = create_openai_tools_agent(llm,tools,prompt)
agent = create_structured_chat_agent(llm, tools, prompt)

In [34]:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, handle_parsing_errors=True)

In [35]:
agent_executor.invoke({"input": "El ministro de Justicia Luis Cordero asume su cargo en enero del 2023"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m

Action:
```
{
    "action": "milvus_query_results",
    "action_input": {
        "query": {
            "title": "Ministro de Justicia Luis Cordero",
            "type": "object"
        }
    }
}
```
[0m[36;1m[1;3m[][0m[32;1m[1;3m

Action:
```
{
    "action": "Final Answer",
    "action_input": "There is no information about the Minister of Justice Luis Cordero in the database."
}
```[0m

[1m> Finished chain.[0m


{'input': 'El ministro de Justicia Luis Cordero asume su cargo en enero del 2023',
 'output': 'There is no information about the Minister of Justice Luis Cordero in the database.'}

In [36]:
agent_executor.invoke({"input": "Terremoto"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m

Action:
```
{
    "action": "milvus_query_results",
    "action_input": {
        "query": {
            "title": "Terremoto",
            "type": "object"
        }
    }
}
```
[0m[36;1m[1;3m[][0m[32;1m[1;3m

Action:
```
{
    "action": "Final Answer",
    "action_input": "I'm sorry, I couldn't find any information related to 'Terremoto'. Could you please provide more context or specify your question?"
}
```[0m

[1m> Finished chain.[0m


{'input': 'Terremoto',
 'output': "I'm sorry, I couldn't find any information related to 'Terremoto'. Could you please provide more context or specify your question?"}

In [None]:
import pandas as pd
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import SentenceTransformersTokenTextSplitter
from tqdm.auto import tqdm
import torch
import os
from pymilvus import MilvusClient, connections, FieldSchema, CollectionSchema, DataType, Collection, utility
import json
import re
import argparse
import os.path
from dotenv import load_dotenv

# Carga las variables de entorno desde el archivo .env
load_dotenv()

ruta_archivo = '../Data/2023_cleaned_embeddings.json'


with open(ruta_archivo, 'r') as f:
    data = json.load(f)

# Replace uri and token with your own
client = MilvusClient(
    uri=os.getenv("MILVUS_ENDPOINT"),
    token=os.getenv("MILVUS_API_KEY")  # API key or a colon-separated cluster username and password
)

# Connect to cluster
connections.connect(
    alias='default',
    # Public endpoint obtained from Zilliz Cloud
    uri=os.getenv("MILVUS_ENDPOINT"),
    # API key or a colon-separated cluster username and password
    token=os.getenv("MILVUS_API_KEY")
)
print(os.getenv("MILVUS_API_KEY"))
# Definimos el tamaño del bloque
block_size = 1000

# Iteramos sobre los bloques
for i in range(0, len(data['rows']), block_size):
    res = client.insert(
        collection_name='News_2023',
        data=data['rows'][i:i + block_size]
    )
    print('Insertando bloque ' + str(i))

print("Data insertada correctamente")