In [1]:
import transformers
import torch
import re
import os
import pandas as pd
import json
import warnings 
import time
warnings.filterwarnings('ignore')

from tqdm import tqdm  
from transformers import AutoTokenizer
#from dotenv import load_dotenv

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# The installation of dotenv failed for the container, I need to explicitly set the hf token.
#load_dotenv()
#token = os.getenv("HUGGINGFACE_TOKEN")
token = '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'

In [3]:
# Set Device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


# Model Loading

## Loading Using Pipeline

In [None]:
from transformers import pipeline

# Set model name
model_id = "meta-llama/Meta-Llama-3-8B"
#model_id = "meta-llama/Meta-Llama-3-70B"

text_generator = transformers.pipeline("text-generation", # LLM task
                                 model=model_id,
                                 model_kwargs={"torch_dtype": torch.float16},
                                 device=device,
                                 token=token,
                                 )
                                 
print("Model loaded successfully using pipline.")

# Prompt
prompt = "Once upon a time, in a faraway kingdom,"

# Generate response
response = text_generator(prompt, max_length=100, num_return_sequences=1)

# Print the generated text
print(response[0]["generated_text"])

## Manual Loading

see llama family: https://huggingface.co/meta-llama

In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Set model name
#model_id = "meta-llama/Meta-Llama-3-70B" # too big to run on GPU
#model_id = "meta-llama/Llama-3.3-70B-Instruct" # too big to run on GPU
#model_id = "meta-llama/Meta-Llama-3.1-8B"; model_name = 'Llama-3.1-8B'
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"; model_name = 'Llama-3.1-8B-Instruct'

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    torch_dtype=torch.float16, 
    local_files_only=True,  # Ensure local loading
    token=token
)

# Move to device
model.to(device)
print(f"Model '{model_name}' loaded successfully!")

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    local_files_only=True,
    token=token, 
    device=device)
print("Tokenizer loaded successfully!")

Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████████████| 4/4 [00:01<00:00,  2.77it/s]


Model 'Llama-3.1-8B-Instruct' loaded successfully!
Tokenizer loaded successfully!


In [5]:
# Prompt
prompt = "Once upon a time, in a faraway kingdom,"

# Tokenize and generate response
inputs = tokenizer(prompt, return_tensors="pt").to(device)
output = model.generate(**inputs, max_length=100, pad_token_id=tokenizer.eos_token_id)

# Decode and print result
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print(generated_text)

Once upon a time, in a faraway kingdom, there lived a beautiful princess named Sophia. She had long, golden hair and sparkling blue eyes that shone like the stars in the night sky. Sophia was kind and gentle, loved by all who knew her, and she spent her days helping those in need and spreading joy wherever she went.
One day, a wicked sorcerer cast a spell on the kingdom, causing all the flowers to wither and die. The kingdom's gardens, once


# Define Classification Function

reference: https://huggingface.co/docs/transformers/main_classes/text_generation

In [6]:
def classify_sentence_llama(input_text, model, tokenizer, template, device):

    # Format the prompt using the provided template 
    prompt = template.format(text=input_text, class_labels="'FUN', 'STR', 'MIX' or 'OTH'")

    # Tokenize the formatted prompt to determine the starting position of the response
    # This is needed because some LLaMA models tend to repeat parts of the prompt in their output
    # See: https://stackoverflow.com/questions/76772509/llama-2-7b-hf-repeats-context-of-question-directly-from-input-prompt-cuts-off-w
    prompt_tokens = tokenizer(prompt, return_tensors="pt")["input_ids"]
    start_index = prompt_tokens.shape[-1]
     
    # Convert the prompt into tokenized input format and move it to the appropriate device (CPU/GPU)
    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    # Generate output using the model
    output = model.generate(
        **inputs,
        max_new_tokens=5,
        num_return_sequences=1, # Generate only one response
        do_sample=False, # Use deterministic output instead of sampling
        pad_token_id=tokenizer.eos_token_id
    )

    # Extract the model-generated text excluding the prompt itself
    output_without_prompt = output[0][start_index:]

    # Decode the generated tokens into human-readable text
    #output_string_completed = tokenizer.decode(output[0], skip_special_tokens=True)
    output_string = tokenizer.decode(output_without_prompt, skip_special_tokens=True).strip()

    # Use regex to match the sentence class (since prompt is repeated in the output string)
    match = re.search(r"(['\"]?)(FUN|STR|MIX|OTH)(['\"]?)", output_string, re.IGNORECASE)
    # Extract label if matched, otherwise default to 'UNK'
    output_label = match.group(2).upper() if match else None

    # Token count (excluding padding)
    input_count = len(tokenizer(prompt, return_tensors="pt", add_special_tokens=False)["input_ids"][0])
    output_count = len(output[0])

    # Debugging statement (prints the formatted prompt)
    #print(f"Generated Prompt:\n{prompt}")

    return output_string, output_label, input_count, output_count

# Load Prompts

In [None]:
import json

with open("/workspace/prompting/prompt_templates.json", "r") as file:
        templates = json.load(file)

# Vizualize templates
for name, template in templates.items():
    print(f"{name}:")
    print(template)
    print()

# Test Classification Function

In [8]:
# Sample Sentences for Testing
input_text = "Additionally, the stopper 108 is used at the distal end of the wire where the loop is formed to substantially secure the loop closed." # MIX
input_text = "Provisional Patent Application number 62/571,193; filed Oct. 11, 2017; and entitled INSECT VACUUM AND TRAP ATTACHMENT SYSTEMS." #OTH
input_text = "In some embodiments, the horizontal position of the idler support block 1213 may be adjustable to maintain tension on the chain 1212." #FUN
input_text = "If there are no allocated cells to a hub using the previous criterion, the first allocated cell will be the closest cell to that hub." #FUN
input_text = "The rigid foam layer 50 is typically selected from the group of polyurethane foams, polyurea foams, and combinations thereof." # STR
input_text = "Both Extracts 1-3 and 6-9 samples and standards were diluted 1:40 in the provided dilution buffer (for example 15 mL serum+585 mL buffer)." #FUN

# Select Template
template = templates['prompt_1']
print(template, '\n')

output_string, output_label, input_count, output_count = classify_sentence_llama(input_text=input_text, model=model, tokenizer=tokenizer, template=template, device=device)
print(f"\n*Output String*: {output_string}")
print(f"\n*Input Tokens*: {input_count}")
print(f"\n*Output Tokens*: {output_count}")
print(f"\n*Predicted Class*: {output_label}") 

Your task is to classify a given sentence as either: 
* 'FUN' - if the sentence describes only the functioning or behavior of a device;
* 'STR' - if the sentence describes only the structure or architecture of a device;
* 'MIX' - if the sentence describes both the functioning and the structure of a device;
* 'OTH' - if the sentence cannot be classified according to any of the previous classes.
The output should only contain one of the class labels: FUN, STR, MIX or OTH.
Sentence: '{text}' 
Class: 


*Output String*: 'OTH' 
Explanation

*Input Tokens*: 152

*Output Tokens*: 158

*Predicted Class*: OTH


#  Classify Sentences

In [9]:
# Import data
test_df = pd.read_excel("/workspace/data/test_agreement.xlsx")
#test_df

In [10]:
# Iterate over templates
for templ_name, template in templates.items():
    if templ_name not in ['prompt_x']:  # 'prompt1', 'prompt3', 'prompt5', 'prompt8' choose and skip specific templates
    
        # Initialize results dictionary
        results = {
            'sent_id': [],
            'sent': [],
            'sent_tag': [],
            'predicted_tag': [],
            'model_output': [],
            'input_count': [],
            'output_count': [],
            'errors': [],
            'elapsed_time_sec': []
        }

        # Perform Classification
        for i, row in tqdm(test_df.iterrows(), total=len(test_df), desc="Processing Sentences"):  
            start_time = time.time()              
            try:
                results['sent_id'].append(row['sent_id'])
                results['sent'].append(row['sent'])
                results['sent_tag'].append(row['sent_tag'])

                # Get classification
                output_string, output_label, input_count, output_count = classify_sentence_llama(input_text=row['sent'], model=model, tokenizer=tokenizer, template=template, device=device)

                # Validate classification output
                if output_label not in ["FUN", "STR", "MIX", "OTH"]:
                    raise ValueError(f"Invalid Classification Tag.")

                # Append Results 
                results['predicted_tag'].append(output_label)
                results['model_output'].append(output_string)
                results['errors'].append(None)  # No error occurred
                results['input_count'].append(input_count)  # Append input token count when successful
                results['output_count'].append(output_count) # Append output token count when successful

            except Exception as e:
                # Handle any errors that occur during processing
                print(f"Error in sentence {row['sent_id']}: {e}. Skipping...")
                results['predicted_tag'].append("ERROR")
                results['model_output'].append(output_string)
                results['input_count'].append("ERROR") 
                results['output_count'].append("ERROR") 
                results['errors'].append(str(e))
            
            finally:
                # Tracking how long it took to process the sentence
                end_time = time.time()
                elapsed_time = end_time-start_time
                results['elapsed_time_sec'].append(f"{elapsed_time}")

        # Convert results to DataFrame and Save
        result_df = pd.DataFrame(results)
        result_df.to_excel(f'/workspace/results/prompting/{model_name}_{templ_name}.xlsx', index = False)
        print(f"{templ_name} completed.")

Processing Sentences: 100%|███████████████████████████████████████████████████████████████████████| 1200/1200 [07:34<00:00,  2.64it/s]


prompt_1 completed.


Processing Sentences: 100%|███████████████████████████████████████████████████████████████████████| 1200/1200 [07:36<00:00,  2.63it/s]


prompt_2 completed.


Processing Sentences: 100%|███████████████████████████████████████████████████████████████████████| 1200/1200 [07:37<00:00,  2.63it/s]


prompt_3 completed.


Processing Sentences: 100%|███████████████████████████████████████████████████████████████████████| 1200/1200 [07:40<00:00,  2.61it/s]


prompt_4 completed.


Processing Sentences: 100%|███████████████████████████████████████████████████████████████████████| 1200/1200 [07:37<00:00,  2.62it/s]


prompt_5 completed.


Processing Sentences: 100%|███████████████████████████████████████████████████████████████████████| 1200/1200 [07:43<00:00,  2.59it/s]


prompt_6 completed.


Processing Sentences: 100%|███████████████████████████████████████████████████████████████████████| 1200/1200 [07:43<00:00,  2.59it/s]


prompt_7 completed.


Processing Sentences: 100%|███████████████████████████████████████████████████████████████████████| 1200/1200 [07:41<00:00,  2.60it/s]


prompt_8 completed.
