**LOAD API KEY**

In [1]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

import warnings
warnings.filterwarnings("ignore")

**Load API Specifications Clean**

In [2]:
import json
from pathlib import Path
from typing import List, Dict, Any
from openapi_spec_validator import validate


class Endpoint:
    def __init__(self, api_title: str, api_description: str, api_version: str, openapi_version: str, path: str, method: str, details: Dict[str, Any]):
        self.api_title = api_title
        self.api_description = api_description
        self.api_version = api_version
        self.openapi_version = openapi_version
        self.path = path
        self.method = method.upper()
        self.details = details
    
    def generate_description(self) -> Dict[str, Any]:
        """
        Génère une description textuelle pour l'endpoint.

        Returns
        -------
        Dict[str, Any]
            La description textuelle de l'endpoint sous forme de dictionnaire.
        """
        endpoint_description = self.details.get('summary', '-')
        parameters = {param['name']: param for param in self.details.get('parameters', [])}
        tags = self.details.get('tags', [])
        responses = self.details.get('responses', {})

        infos = {
            "API_Title": self.api_title,
            "API_Description": self.api_description,
            "API_Version": self.api_version,
            "OpenAPI_Version": self.openapi_version,
            "Route": self.path,
            "Method": self.method,
            "Endpoint_Description": endpoint_description,
            "Parameters": parameters,
            "Tags": tags,
            "Responses": responses
        }

        return json.dumps(infos)

class API:
    def __init__(self, api_data: Dict[str, Any]):
        self.api_data = api_data
        self.title = api_data.get('info', {}).get('title', '-')
        self.description = api_data.get('info', {}).get('description', '-').replace('\n', ' ')
        self.version = api_data.get('info', {}).get('version', '-')
        self.openapi_version = api_data.get('openapi', '-')
        self.endpoints = []
        self._validate_spec()
        self.endpoints = self._create_endpoints()

    def _validate_spec(self):
        """
        Valide la spécification OpenAPI de l'API.

        Raises
        ------
        OpenAPIValidationError
            Si la spécification OpenAPI n'est pas valide.
        """
        try:
            validate(self.api_data)
        except Exception as e:
            raise SyntaxError(f"Invalid OpenAPI specification: {e}") from e
    
    def _create_endpoints(self) -> List[Endpoint]:
        endpoints = []
        for path, methods in self.api_data.get('paths', {}).items():
            for method, details in methods.items():
                endpoints.append(Endpoint(self.title, self.description, self.version, self.openapi_version, path, method, details))
        return endpoints
    
    def generate_endpoints_descriptions(self) -> List[Dict[str, Any]]:
        """
        Génère des descriptions pour tous les endpoints de l'API.

        Returns
        -------
        List[Dict[str, Any]]
            Une liste de dictionnaires, chacun contenant la description d'un endpoint.
        """
        return [endpoint.generate_description() for endpoint in self.endpoints]

def load_api_specs(file_path: Path) -> List[API]:
    """
    Charge les données des APIs à partir d'un fichier JSON et crée des instances d'API.

    Parameters
    ----------
    file_path : Path
        Le chemin du fichier contenant les données de l'API au format JSON.

    Returns
    -------
    List[API]
        Une liste d'instances d'API.
    """
    with file_path.open('r') as file:
        api_specs = json.load(file)
        return [API(api_data) for api_data in api_specs]


In [3]:
from tqdm import tqdm
from langchain_openai import OpenAIEmbeddings
from openai import OpenAI
import joblib
import uuid
import pandas as pd



client = OpenAI()
API_DOC_PATH = Path("../data/APIdocumentation.json")

def get_embedding(text, model="text-embedding-3-small"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding

COMPUTE_EMBEDDINGS = False

if COMPUTE_EMBEDDINGS:
    # Chargement des APIs
    apis = load_api_specs(API_DOC_PATH)

    spec_embeddings = []
    for api in tqdm(apis):
        for endpoint in api.endpoints:
            endpoint_json = endpoint.generate_description()
            embedding = get_embedding(endpoint_json)
            spec_embeddings.append(
                {
                    "embeddings": embedding,
                    "documents": endpoint_json,
                    "metadatas": {"title":api.title, "version":api.version, "openapi":api.openapi_version, "endpoint":endpoint.path, "http_method":endpoint.method},
                    "id": uuid.uuid4()
                }
            )
    joblib.dump(spec_embeddings, "../data/embeddings.joblib")
else:
    spec_embeddings = joblib.load("../data/embeddings.joblib")
    df = pd.DataFrame(spec_embeddings)

FileNotFoundError: [Errno 2] No such file or directory: '../data/embeddings.joblib'

In [10]:
import chromadb
chroma_client = chromadb.Client()

collection = chroma_client.get_or_create_collection(name="api_endpoints")


In [12]:
collection.add(
    documents=df.documents.tolist(),
    embeddings=df.embeddings.tolist(),
    metadatas=df.metadatas.tolist(),
    ids=df.id.apply(str).to_list()
)

Insert of existing embedding ID: d511ec9f-3371-4665-9bf6-19d231e6fa07
Insert of existing embedding ID: bd2a3bd6-dab5-4495-b91d-edc8a99a7186
Insert of existing embedding ID: 5cc05784-9fa8-41d7-b3e3-cbccfd76ce20
Insert of existing embedding ID: 4172a934-0c2b-4d90-884c-98445899e096
Insert of existing embedding ID: 8ea24cbd-69ab-428e-9c25-dd2cda85126c
Insert of existing embedding ID: 5e633809-21c7-4501-885c-9e19c1bff9fb
Insert of existing embedding ID: 9f6925b7-b8fc-41db-8c3f-5add6d8eaebf
Insert of existing embedding ID: cb8f9d2f-01e4-4e0e-a4df-afb476c2248e
Insert of existing embedding ID: 5be489a8-3812-400d-b4c2-6bb4e40fe0a8
Insert of existing embedding ID: d4e6841d-4d4d-4df4-bd81-5fe8a8d5511e
Insert of existing embedding ID: 4400c6ba-5e67-4c1c-ab83-c8df86c27967
Insert of existing embedding ID: 68548d2e-f45b-4de7-bc03-98184ac8d77a
Insert of existing embedding ID: e3d9d29f-ec38-4a1d-806e-1a1eae99718e
Insert of existing embedding ID: ac86b6d9-0234-4267-9982-9e3b610a611a
Insert of existing e

In [28]:
request = "I want to blur my face"

REQUEST = True
if REQUEST:
    embedding = get_embedding(request)
    joblib.dump(embedding, "../data/request_embedding.joblib")
else:
    embedding = joblib.load("../data/request_embedding.joblib")

results = collection.query(query_embeddings = embedding)

In [29]:
results['documents']

[['{"API_Title": "Image Blur API.", "API_Description": " This service blurs the image in the given areas. The areas are given as a list of [x1, y1, x2, y2] coordinates. ", "API_Version": "1.0.0", "OpenAPI_Version": "3.1.0", "Route": "/compute", "Method": "POST", "Endpoint_Description": "Compute task", "Parameters": {}, "Tags": ["Tasks"], "Responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {}}}}, "422": {"description": "Validation Error", "content": {"application/json": {"schema": {"$ref": "#/components/schemas/HTTPValidationError"}}}}}}',
  '{"API_Title": "Image Blur API.", "API_Description": " This service blurs the image in the given areas. The areas are given as a list of [x1, y1, x2, y2] coordinates. ", "API_Version": "1.0.0", "OpenAPI_Version": "3.1.0", "Route": "/status", "Method": "GET", "Endpoint_Description": "Get service availability", "Parameters": {}, "Tags": ["Service"], "Responses": {"200": {"description": "Successful Res

In [23]:
results['distances']

[[1.0147067308425903,
  1.0728217363357544,
  1.1717665195465088,
  1.5974116325378418,
  1.6087987422943115,
  1.6110053062438965,
  1.6125280857086182,
  1.6306476593017578,
  1.6346759796142578,
  1.6511472463607788]]

In [27]:
spec_embeddings[0]['documents']

'{"API_Title": "Album Cover Art Generation API.", "API_Description": "Album Cover Art Generation is an image generation API that allows you to generate an art image (album cover)from lyrics and music style. Returns several images: - `image1`: the first generated image (stabilityai/stable-diffusion-2-base) - `image2`: the second generated image (prompthero/openjourney) - `image3`: the third generated image (civitai) ", "API_Version": "0.0.1", "OpenAPI_Version": "3.0.2", "Route": "/status", "Method": "GET", "Endpoint_Description": "Get service availability", "Parameters": {}, "Tags": ["Service"], "Responses": {"200": {"description": "Successful Response", "content": {"application/json": {"schema": {}}}}}}'

**Connecting to the real cluster**

In [32]:
import chromadb
chroma_client = chromadb.HttpClient(
    host="192.168.49.2",
    port=31791,
    headers={"Authorization": "Bearer vCFVcijgiyaEjBzh34NugffYnEL6i9ng"}
)
collection = chroma_client.get_or_create_collection(name="api_endpoints")

In [33]:
request = "I want to blur my face"

REQUEST = True
if REQUEST:
    embedding = get_embedding(request)
    joblib.dump(embedding, "../data/request_embedding.joblib")
else:
    embedding = joblib.load("../data/request_embedding.joblib")

results = collection.query(query_embeddings = embedding)

In [34]:
results

{'ids': [['4ff67daa-67e2-4e30-be63-1ea9f5cf33cb',
   'f7a4ce4c-61f0-497d-9e90-0ed1d2229d03',
   '627355dc-b178-4e51-8c06-fbe5dfbb3d35',
   'ce1596f6-dc38-4ee3-a535-828fd09aec84',
   'db046546-fab2-4fbc-b602-4236d89ebeb3',
   'c83b0b3a-8c9e-45e3-8201-60c1784aac13',
   '7c5b051d-10aa-4d04-ac82-69a93e1f20d0',
   '4aa2bc9e-050a-4832-b66f-0d7d0ef03d04',
   '8be404e3-a700-4d31-8d64-a855d6b7f35d',
   '348c4971-4f9d-4352-8ceb-e7f15ee5dd0a']],
 'distances': [[1.1214076595255955,
   1.2169302115586516,
   1.2244906658203774,
   1.4515220689875672,
   1.4929895041890096,
   1.5089628713579588,
   1.5180095978740673,
   1.5909513525745822,
   1.5967385579611666,
   1.6241667619100735]],
 'embeddings': None,
 'metadatas': [[{'endpoint': '/compute',
    'http_method': 'POST',
    'openapi': '3.1.0',
    'title': 'Image Blur API.',
    'version': '1.0.0'},
   {'endpoint': '/status',
    'http_method': 'GET',
    'openapi': '3.1.0',
    'title': 'Image Blur API.',
    'version': '1.0.0'},
   {'endpoin