<a href="https://colab.research.google.com/github/CarlosfcPinheiro/pibic-api-llm-integration/blob/main/pibic_aplicacao_llm_gateway.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# LLMs API Gateway
API que funciona como um gateway para interação com LLMs de sumarização através da inference API do Hugging Face.

Os modelos disponíveis para interação através desse gateway, são:
- facebook/bart_large_cnn
- google/pegasus_cnn_dailymail
- knkarthick/MEETING_SUMMARY
- google/pegasus_xsum

## Objetivo
A API tem como objetivo possibilitar a interação com LLMs para realização de testes em diferentes contextos de arquitetura.

In [1]:
# Instalação das dependências ==============
!pip install fastapi uvicorn pyngrok nest-asyncio

Collecting pyngrok
  Downloading pyngrok-7.5.0-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.5.0-py3-none-any.whl (24 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.5.0


In [2]:
# Definindo secret keys e variáveis globais ====================
from google.colab import userdata

MODELS_HF_ROUTES = {
    "facebook_bart_large_cnn": "https://router.huggingface.co/hf-inference/models/facebook/bart-large-cnn",

    "google_pegasus_cnn_dailymail": "https://router.huggingface.co/hf-inference/models/google/pegasus-cnn_dailymail",

    "knkarthick_MEETING_SUMMARY": "https://router.huggingface.co/hf-inference/models/knkarthick/MEETING_SUMMARY",

    "google_pegasus_xsum": "https://router.huggingface.co/hf-inference/models/google/pegasus-xsum"
}
HUGGINGFACE_TOKEN = userdata.get('HUGGINGFACE_TOKEN')
NGROK_AUTH_TOKEN = userdata.get('NGROK_AUTH_TOKEN')
# através de um query parameter, o cliente irá passar qual será o modelo selecionado

In [3]:
# Criação da API com FastAPI ==============
from fastapi import FastAPI, Request, HTTPException
from pydantic import BaseModel
# permite a execução de próximos
import nest_asyncio
from pyngrok import ngrok
import uvicorn
from uvicorn import Config, Server

import pandas

import requests

# corpo de requisição para rota de summarization
class SummarizeRequest(BaseModel):
  text: str

app = FastAPI()

@app.get("/")
async def root():
  return {"message": "Olá, FastAPI para o pibic!"}

@app.post("/summarize", status_code=200)
async def summarize(
    request: SummarizeRequest,
    model: str = "facebook_bart_large_cnn"
):
  try:
    text = request.text
    headers = {"Authorization": f"Bearer {HUGGINGFACE_TOKEN}"}
    hf_url = MODELS_HF_ROUTES[model]

    response = requests.post(
        hf_url,
        headers=headers,
        json={"inputs": text}
    )

    result = response.json()
  except Exception as e:
    print(e)
    raise HTTPException(status_code=500, detail="Houve um problema com o servidor")

  return {"summary": result[0]["summary_text"]}

In [4]:
# Permite que aplicações assíncronas rodem no ambiente do colab ==============
nest_asyncio.apply()

In [5]:
# Conectar a instância do ngrok =================================
ngrok.set_auth_token(NGROK_AUTH_TOKEN)
public_url = ngrok.connect(8000)
print(public_url)

NgrokTunnel: "https://zetta-faunlike-kindheartedly.ngrok-free.dev" -> "http://localhost:8000"


In [6]:
# Configurações do servidor uvicorn ======================
config = Config(app=app, host="0.0.0.0", port=8000, log_level="info")
server = Server(config=config)

In [7]:
# Inicializando o servidor =================
import asyncio
await server.serve()

INFO:     Started server process [610]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     136.115.14.183:0 - "POST /summarize?model=facebook_bart_large_cnn HTTP/1.1" 200 OK
INFO:     136.115.14.183:0 - "POST /summarize?model=facebook_bart_large_cnn HTTP/1.1" 200 OK
INFO:     136.115.14.183:0 - "POST /summarize?model=facebook_bart_large_cnn HTTP/1.1" 200 OK
INFO:     136.115.14.183:0 - "POST /summarize?model=facebook_bart_large_cnn HTTP/1.1" 200 OK
INFO:     136.115.14.183:0 - "POST /summarize?model=facebook_bart_large_cnn HTTP/1.1" 200 OK
INFO:     136.115.14.183:0 - "POST /summarize?model=facebook_bart_large_cnn HTTP/1.1" 200 OK
INFO:     136.115.14.183:0 - "POST /summarize?model=facebook_bart_large_cnn HTTP/1.1" 200 OK
INFO:     136.115.14.183:0 - "POST /summarize?model=facebook_bart_large_cnn HTTP/1.1" 200 OK
INFO:     136.115.14.183:0 - "POST /summarize?model=facebook_bart_large_cnn HTTP/1.1" 200 OK
INFO:     136.115.14.183:0 - "POST /summarize?model=facebook_bart_large_cnn HTTP/1.1" 200 OK
INFO:     136.115.14.183:0 - "POST /summarize?model=facebook_bart_larg

INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [610]
