In [None]:
# Installing python libraries in quiet mode(-q)
!pip install -q transformers # Hugging face Transformers library
!pip install -q accelerate # Simplifies multi-GPU mixed-precision, distributed training
!pip install -q bitsandbytes # A library for 8-bit and 4 bit quantization
!pip install -q sentencepiece # Tokenizer from google

In [None]:
# Importing Libraries
from transformers import AutoModelForCausalLM, AutoTokenizer # For detecting verb, object, instrument from input
import torch

from sentence_transformers import SentenceTransformer, util # sentence transformer model for performing Semantic Textual Similarity (STS) to map verb to executable function

In [None]:
# Loading the model and tokenizer for detecting verb, object, instrument
model_name = "mistralai/Mistral-7B-Instruct-v0.2"  # latest instruction-tuned version

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto",
    load_in_4bit=True
)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
`torch_dtype` is deprecated! Use `dtype` instead!
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

#### In chat style prompting:
1. Role type- "system", Sets the model’s behavior, identity, and global rules
2. Role type- "user",  Represents what the end-user is asking
3. Role type- "assistant", Represents the model’s own prior outputs

In [None]:
messages = [
    {
        "role": "system",
        "content": (
            "You are an expert text parser for a text-based RPG game. "
            "Your job is to extract three fields from the player's command: "
            "\"verb\", \"object\", and \"instrument\". "
            "Return ONLY a valid JSON object with exactly these keys and no extra text."
        )
    },
    # --- Few-shot examples ---
    {
        "role": "user",
        "content": "Attack Gascoigne with Pistol"
    },
    {
        "role": "assistant",
        "content": '{"verb": "attack", "object": "Gascoigne", "instrument": "Pistol"}'
    },
    {
        "role": "user",
        "content": "Stab Gascoigne in the head using a silver dagger"
    },
    {
        "role": "assistant",
        "content": '{"verb": "stab", "object": "beast", "instrument": "silver dagger"}'
    },
    {
        "role": "user",
        "content": "shoot Gascoigne in the head with the Pistol"
    },
    {
        "role": "assistant",
        "content": '{"verb": "shoot", "object": "Gascoigne", "instrument": "Pistol"}'
    },
    # --- No-instrument examples ---
    {
        "role": "user",
        "content": "Open the Cellar door"
    },
    {
        "role": "assistant",
        "content": '{"verb": "open", "object": "cellar door", "instrument": ""}'
    },
    {
        "role": "user",
        "content": "Open the gate to the courtyard"
    },
    {
        "role": "assistant",
        "content": '{"verb": "open", "object": "courtyard gate", "instrument": ""}'
    },
    # --- Real query ---
    {
        "role": "user",
        "content":"Open the gate to the courtyard"
    }
]


# Convert to model input
input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")


outputs = model.generate(
    input_ids,
    max_new_tokens=50,
    temperature=0.2,
    top_p=0.9
)

# Slicing off the part that corresponds to the prompt
gen_only = outputs[:, input_ids.shape[-1]:] # inputs.shape[-1] gives number of tokens in the prompt. The first : without any limit keeps all the rows, then from end of input length to the final token everything is included
# Decoding only new tokens
response = tokenizer.decode(
  gen_only[0],
  skip_special_tokens=True
)

print(response)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{"verb": "open", "object": "gate", "instrument": "", "location": "courtyard"}

Note: I added an optional "location" key to provide more context if needed.


In [None]:
# Mapping verb to executable actions

model = SentenceTransformer("all-MiniLM-L6-v2")

# List of all possible canonical executable action functions
canonical_actions = ["attack", "open", "talk", "use"]
# Building dense embedding(384-dimensional) for each of these executable functions
canon_embs = model.encode(canonical_actions, convert_to_tensor=True) # returns tensor of shape(size of list, 384)

# Helper function to map the detected verb to executable action function
def get_action(verb):
    # Converting verb to dense embedding
    verb_emb = model.encode(verb, convert_to_tensor=True) # returns tensor of shape(1, 384)
    scores = util.cos_sim(verb_emb, canon_embs)[0] # Computes pairwise cosine similarity between verb_emb and canon_embs to build a 2D tensor, [0] gives the first row(cosine similarity of the verb with each of the canon_embs)
    best_idx = scores.argmax().item() # argmax() returns index of largest score as tensor(idx), item() converts it to a normal number idx
    best_action = canonical_actions[best_idx]
    return best_action, float(scores[best_idx])


# Sample Inputs
print(get_action("bash"))
print(get_action("unlock"))
print(get_action("communicate"))


('attack', 0.33193135261535645)
('open', 0.41988199949264526)
('talk', 0.5823869109153748)
