In [1]:
import logging
import sys
import torch
import os
import io
import json
from transformers import AutoTokenizer, AutoModel, pipeline

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name = "nomic-ai/nomic-embed-text-v1"
tokenizer_name = "bert-base-uncased"

In [3]:
os.environ['HF_HOME'] = '/models'

In [4]:
# Logging
def get_logger(logger_name):
   logger = logging.getLogger(logger_name)
   logger.setLevel(logging.DEBUG)
   handler = logging.StreamHandler(sys.stdout)
   handler.setLevel(logging.DEBUG)
   handler.setFormatter(
      logging.Formatter(
      '%(name)s [%(asctime)s] [%(levelname)s] %(message)s'))
   logger.addHandler(handler)
   return logger
logger = get_logger('snowpark-container-service')

In [5]:
logger.info(f'cuda.is_available(): {torch.cuda.is_available()}')
logger.info(f'cuda.device_count(): {torch.cuda.device_count()}')

snowpark-container-service [2024-07-01 16:29:17,591] [INFO] cuda.is_available(): True
snowpark-container-service [2024-07-01 16:29:17,634] [INFO] cuda.device_count(): 4


In [7]:
logger.info('Loading Model ...')
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, padding=True, truncation=True, return_tensors='pt', model_max_length=4096)
model = AutoModel.from_pretrained(model_name, trust_remote_code=True, rotary_scaling_factor=2).to("cuda")
logger.info(f'Model device: {model.device}')
#embedding_pipeline = pipeline("feature-extraction", model=model, tokenizer=tokenizer, device_map="auto")
embedding_pipeline = pipeline(
        "feature-extraction",
        model=model,
        tokenizer=tokenizer,
        max_length=512,
        truncation=True,
        padding=True,
        pad_to_max_length=True,
        device_map="auto",
        framework="pt",
        batch_size=16,
    )
logger.info(f'Embedding pipeline device: {embedding_pipeline.device}')
logger.info('Finished Loading Model.')

snowpark-container-service [2024-07-01 16:29:34,584] [INFO] Loading Model ...


<All keys matched successfully>


snowpark-container-service [2024-07-01 16:29:36,870] [INFO] Model device: cuda:0
snowpark-container-service [2024-07-01 16:29:36,871] [INFO] Embedding pipeline device: cuda:0
snowpark-container-service [2024-07-01 16:29:36,871] [INFO] Finished Loading Model.


In [11]:
dummy = ["This is a test"] * 1000

In [12]:
embeddings = embedding_pipeline(dummy)

In [14]:
processed_embeddings = [torch.mean(torch.tensor(embedding), dim=0).tolist() for embedding in embeddings][0]

In [16]:
processed_embeddings[0]

[1.0849508047103882,
 0.41850367188453674,
 -1.009497880935669,
 -0.7987613081932068,
 0.8745349645614624,
 0.3653618395328522,
 0.5736721158027649,
 -0.3726891577243805,
 -0.15442803502082825,
 -0.7768398523330688,
 -1.1225608587265015,
 0.7906705737113953,
 -0.6027746796607971,
 -0.6063327193260193,
 0.2635617256164551,
 -1.5168598890304565,
 1.3975939750671387,
 -1.2024842500686646,
 0.2530287504196167,
 -0.048934146761894226,
 -0.832142174243927,
 0.918293833732605,
 -1.209232211112976,
 -1.1547542810440063,
 2.968989849090576,
 -0.5146790146827698,
 0.17000658810138702,
 -0.011825086548924446,
 -1.6764367818832397,
 -0.11327772587537766,
 1.9746732711791992,
 -0.07574570924043655,
 0.786652147769928,
 -0.851554274559021,
 -0.6002985835075378,
 -1.851593255996704,
 0.12729787826538086,
 0.41250020265579224,
 1.0306980609893799,
 -0.4135565757751465,
 0.035296812653541565,
 0.9892163276672363,
 0.15602602064609528,
 -1.0448616743087769,
 1.271103024482727,
 -0.051017485558986664,
 0