In [None]:
# Authors: Fluid Numerics LLC
#          Garrett Byrd             (garrett@fluidnumerics.com)
#          Dr. Joseph Schoonover    (joe@fluidnumerics.com)

In [None]:
import transformers
import torch
import random

In [None]:
# Confirm the correct device is being used
# E.g. 'AMD Instinct MI210'
print(f"Device name: {torch.cuda.get_device_name(0)}")

In [None]:
# set path to local model
path_to_model = "/scratch/joe/llama-2-7b-chat-hf"

# set device to 'cuda' for ROCm GPUs, else use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# verify the device is set to 'cuda'
print(f"--------\nDevice: {device}\n--------\n")

# AutoTokenizer is a generic tokenizer class that will be instantiated as one of the tokenizer classes 
# of the library when created with the AutoTokenizer.from_pretrained(pretrained_model_name_or_path) class method.
# https://huggingface.co/transformers/v3.0.2/model_doc/auto.html#transformers.AutoTokenizer

# Instantiate one of the tokenizer classes of the library from a pre-trained model vocabulary.
tokenizer = transformers.AutoTokenizer.from_pretrained(path_to_model)

# A pipeline tokenizes the input, feeds it to the model, and generates an output.
# In this specific case, both the input and outputs are English text.
pipeline = transformers.pipeline(
    "text-generation",              
    model=path_to_model,            
    torch_dtype=torch.float16,      
    device_map="auto",
)

# Load the vocabulary of the model into a list
vocab = list(tokenizer.get_vocab())

# Sample the vocabulary
sample_tokens = random.sample(vocab, 10)

# Print relevant information
print(sample_tokens)
print("-----------")
print(tokenizer.eos_token, tokenizer.eos_token_id)
print(len(vocab))