# Local Inference on Colab
Here is a notebook that should allow you to test the DeepNSM model on Colab.

In [None]:
from peft import PeftModelForCausalLM
from transformers import AutoModelForCausalLM, AutoTokenizer
from google.colab import userdata
import torch

hf_token = userdata.get("HF_TOKEN")

tokenizer = AutoTokenizer.from_pretrained("baartmar/DeepNSM-1B")
nsm_model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-3.2-1B", token=hf_token, device_map="auto"
)
nsm_model.resize_token_embeddings(len(tokenizer))
nsm_model = PeftModelForCausalLM.from_pretrained(
    nsm_model,
    "baartmar/DeepNSM-1B"
)
nsm_model = nsm_model.merge_and_unload()
nsm_model.eval()

You can change the word and list of example sentences here.

In [None]:

word = "scary"
examples = ["that dog's bark is so scary", "that movie is too scary for me"]

examples_str = "\n".join(examples)
prompt = f"""Word: {word}\nExamples:\n{examples_str}\nParaphrase:"""

Use DeepNSM to generate an explication.

In [None]:
inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to("cuda")
input_length = inputs.input_ids.shape[1]

with torch.no_grad():
    output_ids = nsm_model.generate(
        **inputs,
        max_new_tokens=512,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=0.7,
        num_return_sequences=1,
        pad_token_id=tokenizer.eos_token_id  # Explicitly set to suppress pad token warning
    )

# Get only new tokens
new_tokens = output_ids[0][input_length:]
decoded = tokenizer.decode(new_tokens, skip_special_tokens=True)

print(f"Word: {word}")
print(f"Examples: {examples_str}")
print(f"NSM Explication Generated by DeepNSM: {decoded}")