In [14]:

import os
import logging

from auth import auth_user


logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)   
 

from langchain_core.retrievers import BaseRetriever
from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from typing import List

from chainlit.input_widget import *
from typing import Optional

import chainlit as cl
import requests
from typing import List, Dict, Any

# Move these too a specific REPO for the RAG when we have it
class KnowledgeClient:
    def __init__(self, base_url: str, api_key: str):
        self.base_url = base_url
        self.api_key = api_key
        self.headers = {
            "X-API-KEY": self.api_key,
            'accept': 'application/json'
        }

    def search(self, query: str, namespace:str) -> List[Dict[str, Any]]:
        response = requests.get(f"{self.base_url}/search/", 
            params={
                "query": query,
                "index_name":namespace,
                "top_k":5,
                "similarity_threshold":0.1
                },
            headers=self.headers)
        response.raise_for_status()
        return response.json()

    def add_items(self, files: List) -> Dict[str, Any]:
        # Prepare multipart/form-data files
        # Note: 'files' in requests.post() can be a list of tuples for multiple files
        # TODO: ALIGN WITH CARLOS API
        
        if type(files) != list:
            files = [files]
        multipart_files = [('file', (f.name, f, 'application/pdf')) for f in files]

        # Send the POST request to upload files
        response = requests.post(f"{self.base_url}/index/", files=multipart_files, headers=self.headers)

        # Close the file objects
        for f in files:
            f.close()

        return response



    def index_info(self) -> Dict[str, Any]:
        response = requests.get(f"{self.base_url}/info/", headers=self.headers)
        response.raise_for_status()
        return response.json()

class KBRetriever(BaseRetriever):
    kb_client: Optional[KnowledgeClient] = None

    def __init__(self, base_url: str, api_key: str):
        super().__init__()
        self.kb_client = KnowledgeClient(base_url, api_key)

    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
        try:
            search_results = self.kb_client.search(query)
            documents = [Document(page_content=result['text'], metadata=result['attributes']) for result in search_results]
        except Exception as e:
            documents = []
            logger.error(f"Error retrieving documents: {e}")
        return documents


retriever = KBRetriever(
    base_url="https://knowledgebase.test.k8s.mvp.kalavai.net",
    api_key=None
    )

In [17]:
import requests

url = "https://knowledgebase.test.k8s.mvp.kalavai.net/search/"
params = {
    'query': 'ShoeCreatures was re-acquired',
    'index_name': 'carlosfm',
    'top_k': 5,
    'similarity_threshold': 0.1
}
headers = {
    'accept': 'application/json'
}

response = requests.get(url, params=params, headers=headers)
data = response.json()

print(data)



DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): knowledgebase.test.k8s.mvp.kalavai.net:443
DEBUG:urllib3.connectionpool:https://knowledgebase.test.k8s.mvp.kalavai.net:443 "GET /search/?query=ShoeCreatures+was+re-acquired&index_name=carlosfm&top_k=5&similarity_threshold=0.1 HTTP/1.1" 200 4105


[{'page_content': 'increase access to large chain stores. After shifting away focus from running shoes, \r\nShoeCreatures experienced eight years of consecutive unprofitability and losses.\r\nAfter the failures under Antarctica’s ownership, ShoeCreatures was re-acquired by a \r\nmultinational holdings company, Berkwelsh Hemingway. Under Berkwelsh’s ownership, \r\nShoeCreatures was able to make independent strategic moves. ShoeCreatures decided to \r\nstop competing with its larger, established competitors and decided to refocus only into \r\nrunning products, which include shoes, clothing and accessories. As part of this new \r\nstrategic intent, ShoeCreatures concentrated on it’s The Lion shoes while developing\r\ntechnology for alternative running products, resulting in the creation of the The Cheetah, \r\ngeared toward runners with normal feet. ShoeCreatures aimed to be the exclusive running \r\nbrand and expanded its product line to satisfy runners of all ages and styles. Rather th

In [16]:
retriever.kb_client.search(" ShoeCreatures was re-acquired", "carlosfm")

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): knowledgebase.test.k8s.mvp.kalavai.net:443
DEBUG:urllib3.connectionpool:https://knowledgebase.test.k8s.mvp.kalavai.net:443 "GET /search/?query=+ShoeCreatures+was+re-acquired&index_name=carlosfm HTTP/1.1" 422 285


HTTPError: 422 Client Error: Unprocessable Entity for url: https://knowledgebase.test.k8s.mvp.kalavai.net/search/?query=+ShoeCreatures+was+re-acquired&index_name=carlosfm

In [None]:
https://knowledgebase.test.k8s.mvp.kalavai.net/search/?query=+ShoeCreatures+was+re-acquired&index_name=carlosfm
https://knowledgebase.test.k8s.mvp.kalavai.net/search/?query=%20ShoeCreatures%20was%20re-acquired&index_name=carlosfm&top_k=5&similarity_threshold=0.1