In [1]:
from typing import List

import torch
from ray import serve
from starlette.requests import Request
from transformers import pipeline


In [2]:
class EmbeddingModel:
    def __init__(self):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.pipeline = pipeline(
            task="feature-extraction",
            model="sentence-transformers/all-MiniLM-L6-v2",
            device=self.device,
        )

    def get_embedding(self, text: str) -> List[float]:
        # Run inference
        model_output = self.pipeline(text, return_tensors="pt")

        # Post-process output to return only the translation text
        mean_embedding = torch.mean(model_output, axis=1).cpu().tolist()
        return mean_embedding


In [3]:
emb = EmbeddingModel()

Device set to use cpu


In [5]:
emb.get_embedding("Hallo Welt")

[[-0.3021381199359894,
  0.43801629543304443,
  -0.47750887274742126,
  -0.1138516440987587,
  -0.29010874032974243,
  -0.18443423509597778,
  0.308556467294693,
  -0.18996448814868927,
  -0.2879277467727661,
  0.0985262393951416,
  -0.28754356503486633,
  -0.3971247375011444,
  -0.03666241839528084,
  0.16925370693206787,
  0.10703784227371216,
  -0.03871149197220802,
  0.0005119740962982178,
  0.5670316219329834,
  -0.46602460741996765,
  -0.16471493244171143,
  0.26417359709739685,
  0.12048550695180893,
  -0.7360889315605164,
  0.2233593463897705,
  -0.01815086603164673,
  -0.24545173346996307,
  0.14327217638492584,
  0.2080170065164566,
  -0.13121744990348816,
  -0.7230426669120789,
  -0.07743227481842041,
  0.34947898983955383,
  0.11738666146993637,
  -0.20282994210720062,
  -0.1449158936738968,
  0.038138266652822495,
  -0.009052763693034649,
  0.11486014723777771,
  0.01284284982830286,
  -0.009859909303486347,
  -0.4935828149318695,
  -0.1479654461145401,
  -0.03193605318665