**LOAD API KEY**

In [1]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

import warnings
warnings.filterwarnings("ignore")

**LOAD API SPECIFICATIONS**

In [2]:
import json
from pathlib import Path
from typing import List, Dict

API_DOC_PATH = Path("../data/APIdocumentation.json")


def load_api_specs(file_path: Path) -> List[dict]:
    """
    Charge les données des APIs à partir d'un fichier JSON.

    Parameters
    ----------
    file_path : Path
        Le chemin du fichier contenant les données de l'API au format JSON.

    Returns
    -------
    List[Dict]
        Une liste de dictionnaires, chacun représentant une API et ses endpoints.
    """
    with file_path.open('r') as file:
        api_specs = json.load(file)
        return api_specs

api_specs = load_api_specs(API_DOC_PATH)

def generate_api_endpoints_descriptions(api_data: Dict) -> List[str]:
    """
    Génère des descriptions pour tous les endpoints d'une API donnée.

    Parameters
    ----------
    api_data : Dict
        Les données d'une API spécifique.

    Returns
    -------
    List[str]
        Une liste de descriptions textuelles pour chaque endpoint.
    """
    descriptions = []
    for path, methods in api_data.get('paths', {}).items():
        for method, details in methods.items():
            description = generate_endpoint_description(api_data, path, method, details)
            descriptions.append(description)
    return descriptions


def generate_endpoint_description(api_data: Dict, path: str, method: str, details: Dict) -> str:
    """
    Génère une description textuelle pour un endpoint.

    Parameters
    ----------
    api_data : Dict
        Les données de l'API contenant cet endpoint.
    path : str
        Le chemin de l'endpoint.
    method : str
        La méthode HTTP (GET, POST, etc.).
    details : Dict
        Les détails de l'endpoint.

    Returns
    -------
    str
        La description textuelle de l'endpoint.
    """
    api_version = api_data.get('openapi', '-')
    api_title = api_data.get('info', {}).get('title', '-')
    api_description = api_data.get('info', {}).get('description', '-').replace('\n', ' ')
    endpoint_description = details.get('summary', '-')
    api_version_number = api_data.get('info', {}).get('version', '-')
    parameters = {param['name']: param for param in details.get('parameters', [])}
    tags = details.get('tags', [])
    responses = details.get('responses', {})

    return f"API_Title: {api_title}, API_Description: {api_description}, API_Version: {api_version_number}, "\
           f"OpenAPI_Version: {api_version}, Route: {path}, Method: {method.upper()}, Endpoint_Description: {endpoint_description}, "\
           f"Parameters: {parameters}, Tags: {tags}, Responses: {responses}"

generate_api_endpoints_descriptions(api_specs[0])

["API_Title: Album Cover Art Generation API., API_Description: Album Cover Art Generation is an image generation API that allows you to generate an art image (album cover)from lyrics and music style. Returns several images: - `image1`: the first generated image (stabilityai/stable-diffusion-2-base) - `image2`: the second generated image (prompthero/openjourney) - `image3`: the third generated image (civitai) , API_Version: 0.0.1, OpenAPI_Version: 3.0.2, Route: /status, Method: GET, Endpoint_Description: Get service availability, Parameters: {}, Tags: ['Service'], Responses: {'200': {'description': 'Successful Response', 'content': {'application/json': {'schema': {}}}}}",
 "API_Title: Album Cover Art Generation API., API_Description: Album Cover Art Generation is an image generation API that allows you to generate an art image (album cover)from lyrics and music style. Returns several images: - `image1`: the first generated image (stabilityai/stable-diffusion-2-base) - `image2`: the seco

**Load API Specifications Clean**

In [3]:
import json
from pathlib import Path
from typing import List, Dict, Any
from openapi_spec_validator import validate

class Endpoint:
    def __init__(self, api_title: str, api_description: str, api_version: str, openapi_version: str, path: str, method: str, details: Dict[str, Any]):
        self.api_title = api_title
        self.api_description = api_description
        self.api_version = api_version
        self.openapi_version = openapi_version
        self.path = path
        self.method = method.upper()
        self.details = details
    
    def generate_description(self) -> Dict[str, Any]:
        """
        Génère une description textuelle pour l'endpoint.

        Returns
        -------
        Dict[str, Any]
            La description textuelle de l'endpoint sous forme de dictionnaire.
        """
        endpoint_description = self.details.get('summary', '-')
        parameters = {param['name']: param for param in self.details.get('parameters', [])}
        tags = self.details.get('tags', [])
        responses = self.details.get('responses', {})

        return {
            "API_Title": self.api_title,
            "API_Description": self.api_description,
            "API_Version": self.api_version,
            "OpenAPI_Version": self.openapi_version,
            "Route": self.path,
            "Method": self.method,
            "Endpoint_Description": endpoint_description,
            "Parameters": parameters,
            "Tags": tags,
            "Responses": responses
        }

class API:
    def __init__(self, api_data: Dict[str, Any]):
        self.api_data = api_data
        self.title = api_data.get('info', {}).get('title', '-')
        self.description = api_data.get('info', {}).get('description', '-').replace('\n', ' ')
        self.version = api_data.get('info', {}).get('version', '-')
        self.openapi_version = api_data.get('openapi', '-')
        self.endpoints = []
        self._validate_spec()
        self.endpoints = self._create_endpoints()

    def _validate_spec(self):
        """
        Valide la spécification OpenAPI de l'API.

        Raises
        ------
        OpenAPIValidationError
            Si la spécification OpenAPI n'est pas valide.
        """
        try:
            validate(self.api_data)
        except Exception as e:
            raise SyntaxError(f"Invalid OpenAPI specification: {e}") from e
    
    def _create_endpoints(self) -> List[Endpoint]:
        endpoints = []
        for path, methods in self.api_data.get('paths', {}).items():
            for method, details in methods.items():
                endpoints.append(Endpoint(self.title, self.description, self.version, self.openapi_version, path, method, details))
        return endpoints
    
    def generate_endpoints_descriptions(self) -> List[Dict[str, Any]]:
        """
        Génère des descriptions pour tous les endpoints de l'API.

        Returns
        -------
        List[Dict[str, Any]]
            Une liste de dictionnaires, chacun contenant la description d'un endpoint.
        """
        return [endpoint.generate_description() for endpoint in self.endpoints]

def load_api_specs(file_path: Path) -> List[API]:
    """
    Charge les données des APIs à partir d'un fichier JSON et crée des instances d'API.

    Parameters
    ----------
    file_path : Path
        Le chemin du fichier contenant les données de l'API au format JSON.

    Returns
    -------
    List[API]
        Une liste d'instances d'API.
    """
    with file_path.open('r') as file:
        api_specs = json.load(file)
        return [API(api_data) for api_data in api_specs]


In [4]:
from tqdm import tqdm
from langchain_openai import OpenAIEmbeddings
from openai import OpenAI
import joblib

client = OpenAI()

def get_embedding(text, model="text-embedding-3-small"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding

COMPUTE_EMBEDDINGS = False

if COMPUTE_EMBEDDINGS:
    # Chargement des APIs
    apis = load_api_specs(API_DOC_PATH)

    spec_embeddings = []
    for api in tqdm(apis):
        for endpoint in api.generate_endpoints_descriptions():
            endpoint_text = str(endpoint)
            spec_embeddings.append(get_embedding(endpoint_text))
    joblib.dump(spec_embeddings, "../data/embeddings.joblib")
else:
    spec_embeddings = joblib.load("../data/embeddings.joblib")

In [5]:
import chromadb
chroma_client = chromadb.Client()

collection = chroma_client.create_collection(name="api_endpoints")
collection.add(
    embeddings=spec_embeddings,
    ids=[f"endpoint_{i}" for i in range(len(spec_embeddings))],
)

In [12]:
request = "Recognized digits with mnist dataset"

REQUEST = False
if REQUEST:
    embedding = get_embedding(request)
    joblib.dump(embedding, "../data/request_embedding.joblib")
else:
    embedding = joblib.load("../data/request_embedding.joblib")

results = collection.query(query_embeddings = embedding)

results

{'ids': [['endpoint_16',
   'endpoint_14',
   'endpoint_15',
   'endpoint_7',
   'endpoint_68',
   'endpoint_52',
   'endpoint_13',
   'endpoint_85',
   'endpoint_25',
   'endpoint_66']],
 'distances': [[0.9652571082115173,
   1.0322821140289307,
   1.131253957748413,
   1.5743967294692993,
   1.5746488571166992,
   1.578432321548462,
   1.5806623697280884,
   1.6010658740997314,
   1.609519600868225,
   1.6291850805282593]],
 'metadatas': [[None, None, None, None, None, None, None, None, None, None]],
 'embeddings': None,
 'documents': [[None, None, None, None, None, None, None, None, None, None]],
 'uris': None,
 'data': None}