**LOAD API KEY**

In [1]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

import warnings
warnings.filterwarnings("ignore")

**Load API Specifications Clean**

In [2]:
import json
from pathlib import Path
from typing import List, Dict, Any
from openapi_spec_validator import validate


class Endpoint:
    def __init__(self, api_title: str, api_description: str, api_version: str, openapi_version: str, path: str, method: str, details: Dict[str, Any]):
        self.api_title = api_title
        self.api_description = api_description
        self.api_version = api_version
        self.openapi_version = openapi_version
        self.path = path
        self.method = method.upper()
        self.details = details
    
    def generate_description(self) -> Dict[str, Any]:
        """
        Génère une description textuelle pour l'endpoint.

        Returns
        -------
        Dict[str, Any]
            La description textuelle de l'endpoint sous forme de dictionnaire.
        """
        endpoint_description = self.details.get('summary', '-')
        parameters = {param['name']: param for param in self.details.get('parameters', [])}
        tags = self.details.get('tags', [])
        responses = self.details.get('responses', {})

        infos = {
            "API_Title": self.api_title,
            "API_Description": self.api_description,
            "API_Version": self.api_version,
            "OpenAPI_Version": self.openapi_version,
            "Route": self.path,
            "Method": self.method,
            "Endpoint_Description": endpoint_description,
            "Parameters": parameters,
            "Tags": tags,
            "Responses": responses
        }

        return json.dumps(infos)

class API:
    def __init__(self, api_data: Dict[str, Any]):
        self.api_data = api_data
        self.title = api_data.get('info', {}).get('title', '-')
        self.description = api_data.get('info', {}).get('description', '-').replace('\n', ' ')
        self.version = api_data.get('info', {}).get('version', '-')
        self.openapi_version = api_data.get('openapi', '-')
        self.endpoints = []
        self._validate_spec()
        self.endpoints = self._create_endpoints()

    def _validate_spec(self):
        """
        Valide la spécification OpenAPI de l'API.

        Raises
        ------
        OpenAPIValidationError
            Si la spécification OpenAPI n'est pas valide.
        """
        try:
            validate(self.api_data)
        except Exception as e:
            raise SyntaxError(f"Invalid OpenAPI specification: {e}") from e
    
    def _create_endpoints(self) -> List[Endpoint]:
        endpoints = []
        for path, methods in self.api_data.get('paths', {}).items():
            for method, details in methods.items():
                endpoints.append(Endpoint(self.title, self.description, self.version, self.openapi_version, path, method, details))
        return endpoints
    
    def generate_endpoints_descriptions(self) -> List[Dict[str, Any]]:
        """
        Génère des descriptions pour tous les endpoints de l'API.

        Returns
        -------
        List[Dict[str, Any]]
            Une liste de dictionnaires, chacun contenant la description d'un endpoint.
        """
        return [endpoint.generate_description() for endpoint in self.endpoints]

def load_api_specs(file_path: Path) -> List[API]:
    """
    Charge les données des APIs à partir d'un fichier JSON et crée des instances d'API.

    Parameters
    ----------
    file_path : Path
        Le chemin du fichier contenant les données de l'API au format JSON.

    Returns
    -------
    List[API]
        Une liste d'instances d'API.
    """
    with file_path.open('r') as file:
        api_specs = json.load(file)
        return [API(api_data) for api_data in api_specs]


In [3]:
from tqdm import tqdm
from langchain_openai import OpenAIEmbeddings
from openai import OpenAI
import joblib
import uuid
import pandas as pd



client = OpenAI()
API_DOC_PATH = Path("../data/APIdocumentation.json")

def get_embedding(text, model="text-embedding-3-small"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding

COMPUTE_EMBEDDINGS = False

if COMPUTE_EMBEDDINGS:
    # Chargement des APIs
    apis = load_api_specs(API_DOC_PATH)

    spec_embeddings = []
    for api in tqdm(apis):
        for endpoint in api.endpoints:
            endpoint_json = endpoint.generate_description()
            embedding = get_embedding(endpoint_json)
            spec_embeddings.append(
                {
                    "embeddings": embedding,
                    "documents": endpoint_json,
                    "metadatas": {"title":api.title, "version":api.version, "openapi":api.openapi_version, "endpoint":endpoint.path, "http_method":endpoint.method},
                    "id": uuid.uuid4()
                }
            )
    joblib.dump(spec_embeddings, "../data/embeddings.joblib")
else:
    spec_embeddings = joblib.load("../data/embeddings.joblib")
    df = pd.DataFrame(spec_embeddings)

FileNotFoundError: [Errno 2] No such file or directory: '../data/embeddings.joblib'

In [4]:
import chromadb
chroma_client = chromadb.Client()

collection = chroma_client.get_or_create_collection(name="api_endpoints")


In [5]:
collection.add(
    documents=df.documents.tolist(),
    embeddings=df.embeddings.tolist(),
    metadatas=df.metadatas.tolist(),
    ids=df.id.apply(str).to_list()
)

NameError: name 'df' is not defined

In [6]:
request = "I want to blur my face"

REQUEST = True
if REQUEST:
    embedding = get_embedding(request)
    joblib.dump(embedding, "../data/request_embedding.joblib")
else:
    embedding = joblib.load("../data/request_embedding.joblib")

results = collection.query(query_embeddings = embedding)

In [7]:
results['documents']

[[]]

In [8]:
results['distances']

[[]]

In [9]:
spec_embeddings[0]['documents']

NameError: name 'spec_embeddings' is not defined

**Connecting to the real cluster**

In [None]:
import chromadb
chroma_client = chromadb.HttpClient(
    host="192.168.49.2",
    port=31791,
    headers={"Authorization": "Bearer Hnq6GBxJlHfJEGoxrazxXdoG3aQ0gncP"}
)
collection = chroma_client.get_or_create_collection(name="api_endpoints")

In [None]:
request = "I want to blur my face"

REQUEST = True
if REQUEST:
    embedding = get_embedding(request)
    joblib.dump(embedding, "../data/request_embedding.joblib")
else:
    embedding = joblib.load("../data/request_embedding.joblib")

results = collection.query(query_embeddings = embedding)

In [None]:
results

{'ids': [['4ff67daa-67e2-4e30-be63-1ea9f5cf33cb',
   'f7a4ce4c-61f0-497d-9e90-0ed1d2229d03',
   '627355dc-b178-4e51-8c06-fbe5dfbb3d35',
   'ce1596f6-dc38-4ee3-a535-828fd09aec84',
   'db046546-fab2-4fbc-b602-4236d89ebeb3',
   'c83b0b3a-8c9e-45e3-8201-60c1784aac13',
   '7c5b051d-10aa-4d04-ac82-69a93e1f20d0',
   '4aa2bc9e-050a-4832-b66f-0d7d0ef03d04',
   '8be404e3-a700-4d31-8d64-a855d6b7f35d',
   '348c4971-4f9d-4352-8ceb-e7f15ee5dd0a']],
 'distances': [[1.1214076595255955,
   1.2169302115586516,
   1.2244906658203774,
   1.4515220689875672,
   1.4929895041890096,
   1.5089628713579588,
   1.5180095978740673,
   1.5909513525745822,
   1.5967385579611666,
   1.6241667619100735]],
 'embeddings': None,
 'metadatas': [[{'endpoint': '/compute',
    'http_method': 'POST',
    'openapi': '3.1.0',
    'title': 'Image Blur API.',
    'version': '1.0.0'},
   {'endpoint': '/status',
    'http_method': 'GET',
    'openapi': '3.1.0',
    'title': 'Image Blur API.',
    'version': '1.0.0'},
   {'endpoin