In [None]:
# serve.py
import ray
from ray import serve
import pandas as pd
import faiss
from transformers import AutoTokenizer, AutoModel
import torch
import yaml

# Load serve configuration
with open('serve.yaml', 'r') as f:
    config = yaml.safe_load(f)

index_path = config['index_path']
embeddings_path = config['embeddings_path']
port = config['port']

# Initialize Ray Serve
ray.init()
serve.start()

# Load FAISS index and embeddings
df = pd.read_pickle(embeddings_path)
index = faiss.read_index(index_path)

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')

# Function to generate embeddings for query
def get_embeddings(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()

# Ray Serve deployment class
@serve.deployment
class SearchEngine:
    def __init__(self):
        self.df = df
        self.index = index

    def search(self, query, top_k=5):
        query_embedding = get_embeddings(query).reshape(1, -1)
        distances, indices = self.index.search(query_embedding, top_k)
        results = self.df.iloc[indices[0]]
        return results[['id', 'text']].to_dict(orient="records"), distances[0].tolist()

    async def __call__(self, request):
        query = await request.json()
        query_text = query['query']
        top_k = query.get('top_k', 5)
        results, distances = self.search(query_text, top_k)
        return {"results": results, "distances": distances}

# Create and run the deployment
search_engine = SearchEngine.bind()

: 

In [None]:
# Start the deployment and set the route prefix here
%pdef serve.run