In [1]:
# basics
import os
import json
import numpy
import requests

# vectorDB
import weaviate
import weaviate.classes as wvc

# dl
import torch
from transformers import AutoTokenizer, AutoModel

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
# Load Hugging Face model and tokenizer
model_name = "meta-llama/Meta-Llama-3-8B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

def vectorize_text(text):
    inputs = tokenizer(text, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    # Take the mean of the token embeddings as the sentence embedding
    embeddings = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
    return embeddings

WEAVIATE_URL = os.getenv("WCD_URL")
WEAVIATE_API_KEY = os.getenv("WCD_API_KEY")

# Connect to Weaviate Cloud
client = weaviate.Client(
    url=WEAVIATE_URL,
    auth_client_secret=weaviate.auth.AuthApiKey(api_key=WEAVIATE_API_KEY)
)

try:
    # Example text to vectorize
    text = "Weaviate is a great tool for managing vectors!"
    vector = vectorize_text(text)
    print(vector)

finally:
    pass

Loading checkpoint shards: 100%|██████████| 4/4 [01:12<00:00, 18.12s/it]


[ 0.18895923  1.0901791   0.8831661  ...  1.0861775  -1.2602544
  1.6406289 ]


In [None]:
# setting up collection
# resetting the schema. CAUTION: This will delete your collection 
if client.schema.exists("MyFirstCollection"):
     client.schema.delete_class("MyFirstCollection")