In [5]:
!pip install fastapi uvicorn pyngrok nest_asyncio llama-index



In [6]:
!pip install jedi>=0.16

In [7]:
!pip install llama-index-embeddings-huggingface



In [None]:
from fastapi import FastAPI, HTTPException, UploadFile, File
from pydantic import BaseModel
from typing import List, Dict, Union
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import nest_asyncio
from pyngrok import ngrok
from google.colab import userdata
import uvicorn
import io


# Apply nest_asyncio patch for Colab
nest_asyncio.apply()


# Get the ngrok authtoken from Colab secrets
NGROK_AUTH_TOKEN = userdata.get('NGROK_AUTH_TOKEN')

# Set the authtoken
ngrok.set_auth_token(NGROK_AUTH_TOKEN)


# Create FastAPI app
app = FastAPI(
    title="Hugging Face LlamaIndex Embedding API",
    description="Embed text, documents, images, or multimodal using HuggingFace",
    version="1.1.0"
)

# Load text embedding model
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

# Load CLIP model for image & multimodal
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")

# Schemas
class SingleTextRequest(BaseModel):
    text: str

class BatchTextRequest(BaseModel):
    texts: List[str]

class EmbeddingResponse(BaseModel):
    embeddings: List[List[float]]

# Endpoints
@app.get("/")
async def root():
    return {"message": "Embedding API is running!"}

@app.post("/embed_text", response_model=EmbeddingResponse)
async def embed_single_text(request: SingleTextRequest):
    if not request.text:
        raise HTTPException(status_code=400, detail="No text provided")
    embedding = embed_model.get_text_embedding(request.text)
    return EmbeddingResponse(embeddings=[embedding])

@app.post("/embed_docs", response_model=EmbeddingResponse)
async def embed_documents(request: BatchTextRequest):
    print("Request texts:", request.texts)

    # Validate input
    texts = request.texts
    if not texts:
        raise HTTPException(status_code=400, detail="No texts provided")

    # Auto-flatten nested list if needed
    if len(texts) == 1 and isinstance(texts[0], list):
        texts = texts[0]  # flatten

    # Ensure all items are strings
    if not all(isinstance(t, str) for t in texts):
        raise HTTPException(status_code=400, detail="Each item in 'texts' must be a string.")

    # Filter out empty strings
    texts = [t for t in texts if t.strip()]
    if not texts:
        raise HTTPException(status_code=400, detail="No valid texts provided after filtering empty strings")

    try:
        # Get embeddings - handle both single and batch cases
        if len(texts) == 1:
            embedding = embed_model.get_text_embedding(texts[0])
            embeddings = [embedding]
        else:
            embeddings = []
            for text in texts:
                embedding = embed_model.get_text_embedding(text)
                embeddings.append(embedding)

        return EmbeddingResponse(embeddings=embeddings)

    except Exception as e:
        print(f"Error processing texts: {e}")
        raise HTTPException(status_code=500, detail=f"Error processing embeddings: {str(e)}")

@app.post("/embed_batch", response_model=EmbeddingResponse)
async def embed_batch_file(file: UploadFile = File(...)):
    """
    Upload a text file with one document per line.
    """
    contents = await file.read()
    text_data = contents.decode("utf-8").splitlines()

    # Filter out empty lines
    text_data = [line.strip() for line in text_data if line.strip()]

    if not text_data:
        raise HTTPException(status_code=400, detail="Uploaded file is empty or contains no valid text.")

    try:
        embeddings = []
        for text in text_data:
            embedding = embed_model.get_text_embedding(text)
            embeddings.append(embedding)

        return EmbeddingResponse(embeddings=embeddings)

    except Exception as e:
        print(f"Error processing batch file: {e}")
        raise HTTPException(status_code=500, detail=f"Error processing embeddings: {str(e)}")

@app.post("/embed_image", response_model=EmbeddingResponse)
async def embed_image_file(file: UploadFile = File(...)):
    """
    Upload an image and get its embedding.
    """
    try:
        contents = await file.read()
        image = Image.open(io.BytesIO(contents)).convert("RGB")
        inputs = clip_processor(images=image, return_tensors="pt")
        image_emb = clip_model.get_image_features(**inputs)
        image_emb = image_emb[0].detach().tolist()
        return EmbeddingResponse(embeddings=[image_emb])

    except Exception as e:
        print(f"Error processing image: {e}")
        raise HTTPException(status_code=500, detail=f"Error processing image: {str(e)}")

@app.post("/embed_multimodal", response_model=EmbeddingResponse)
async def embed_multimodal(
    text: str,
    file: UploadFile = File(...)
):
    """
    Upload an image and text, get both embeddings.
    Use form data: send 'text' as a form field and 'file' as file upload.
    """
    if not text or not text.strip():
        raise HTTPException(status_code=400, detail="No text provided")

    try:
        # Text embedding (CLIP)
        text_inputs = clip_processor(text=text, return_tensors="pt", padding=True)
        text_emb = clip_model.get_text_features(**text_inputs)
        text_emb = text_emb[0].detach().tolist()

        # Image embedding (CLIP)
        contents = await file.read()
        image = Image.open(io.BytesIO(contents)).convert("RGB")
        image_inputs = clip_processor(images=image, return_tensors="pt")
        image_emb = clip_model.get_image_features(**image_inputs)
        image_emb = image_emb[0].detach().tolist()

        return EmbeddingResponse(embeddings=[text_emb, image_emb])

    except Exception as e:
        print(f"Error processing multimodal input: {e}")
        raise HTTPException(status_code=500, detail=f"Error processing multimodal input: {str(e)}")

# Expose with ngrok
ngrok_tunnel = ngrok.connect(8000)
print('Public URL:', ngrok_tunnel.public_url)

# Run Uvicorn inside Colab
uvicorn.run(app, host='0.0.0.0', port=8000)



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


preprocessor_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/592 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/605M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/605M [00:00<?, ?B/s]

Public URL: https://b63a-34-171-140-248.ngrok-free.app


INFO:     Started server process [1265]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     103.69.161.2:0 - "GET / HTTP/1.1" 200 OK
INFO:     103.69.161.2:0 - "GET /favicon.ico HTTP/1.1" 404 Not Found
