# Classify with Llama

In [1]:
import transformers
import torch
from transformers import AutoTokenizer
import os
#from dotenv import load_dotenv

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# The installation of dotenv failed for the container, I need to explicitly set the hf token.
#load_dotenv()
#token = os.getenv("HUGGINGFACE_TOKEN")
token = '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'

In [3]:
# Set Device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


# Loading Using Pipeline

In [4]:
from transformers import pipeline

# Set model name
model_id = "meta-llama/Meta-Llama-3-8B"
#model_id = "meta-llama/Meta-Llama-3-70B"

text_generator = transformers.pipeline("text-generation", # LLM task
                                 model=model_id,
                                 model_kwargs={"torch_dtype": torch.float16},
                                 device=device,
                                 token=token,
                                 )
                                 
print("Model loaded successfully using pipline.")

# Prompt
prompt = "Once upon a time, in a faraway kingdom,"

# Generate response
response = text_generator(prompt, max_length=100, num_return_sequences=1)

# Print the generated text
print(response[0]["generated_text"])

Downloading shards: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [06:21<00:00, 95.49s/it]
Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:01<00:00,  2.50it/s]


Model loaded successfully on GPU!


# Manual Loading

In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Set model name
model_id = "meta-llama/Llama-2-7b-hf"
model_id = "meta-llama/Meta-Llama-3-8B"
#model_id = "meta-llama/Meta-Llama-3-70B"
#model_id = "meta-llama/Llama-3.3-70B-Instruct"

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    torch_dtype=torch.float16, 
    local_files_only=True,  # Ensure local loading
    token=token
)

# Move to device
model.to(device)
print("Model loaded successfully!")

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    local_files_only=True,
    token=token, 
    device=device)
print("Tokenizer loaded successfully!")

# Prompt
prompt = "Once upon a time, in a faraway kingdom,"

# Tokenize and generate response
inputs = tokenizer(prompt, return_tensors="pt").to(device)
output = model.generate(**inputs, max_length=100)

# Decode and print result
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print(generated_text)

Loading checkpoint shards: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:10<00:00,  2.99it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 448.00 MiB. GPU 0 has a total capacity of 79.14 GiB of which 69.62 MiB is free. Including non-PyTorch memory, this process has 0 bytes memory in use. Of the allocated memory 76.43 GiB is allocated by PyTorch, and 552.50 KiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [27]:
def count_tokens(prompt):
    encoding = tiktoken.get_encoding("cl100k_base")
    return len(encoding.encode(prompt))

def classify_sentence(input_text, model, tokenizer, prompt, device):
     
    # Tokenize and generate response
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    output = model.generate(**inputs, max_new_tokens=10)

    # Decode and print result
    pred_class = tokenizer.decode(output[0], skip_special_tokens=True)

    # Calculate token count
    #token_count = count_tokens(prompt_template.format(text=input_text, class_labels=class_labels))

    # Debugging statement (prints the formatted prompt)
    #print(f"Generated Prompt:\n{prompt_template.format(text=input_text, class_labels=class_labels)}")

    return pred_class

In [29]:
# Sample Sentences for Testing
input_text = "Additionally, the stopper 108 is used at the distal end of the wire where the loop is formed to substantially secure the loop closed." # MIX
input_text = "Provisional Patent Application number 62/571,193; filed Oct. 11, 2017; and entitled INSECT VACUUM AND TRAP ATTACHMENT SYSTEMS." #OTH
input_text = "In some embodiments, the horizontal position of the idler support block 1213 may be adjustable to maintain tension on the chain 1212." #FUN
input_text = "If there are no allocated cells to a hub using the previous criterion, the first allocated cell will be the closest cell to that hub." #FUN
input_text = "The rigid foam layer 50 is typically selected from the group of polyurethane foams, polyurea foams, and combinations thereof." # STR

prompt = f"""
        Your task is to classify a given sentence as either: 
        * 'FUN' - if the sentence describes only the functioning or behavior of a device;
        * 'STR' - if the sentence describes only the structure or architecture of a device;
        * 'MIX' - if the sentence describes both the functioning and the structure of a device;
        * 'OTH' - if the sentence cannot be classified according to any of the previous classes.
        
        The output should only contain one of the class labels: "FUN", "STR", "MIX" or "OTH".
        Sentence: "{input_text}"
        Class:
"""
#print(prompt)

pred_class = classify_sentence(input_text=input_text, model=model, tokenizer=tokenizer, prompt=prompt, device=device)
print(pred_class)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



        Your task is to classify a given sentence as either: 
        * 'FUN' - if the sentence describes only the functioning or behavior of a device;
        * 'STR' - if the sentence describes only the structure or architecture of a device;
        * 'MIX' - if the sentence describes both the functioning and the structure of a device;
        * 'OTH' - if the sentence cannot be classified according to any of the previous classes.
        
        The output should only contain one of the class labels: "FUN", "STR", "MIX" or "OTH".
        Sentence: "The rigid foam layer 50 is typically selected from the group of polyurethane foams, polyurea foams, and combinations thereof."
        Class:
        MIX
        Sentence: "The rigid foam
