In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from pathlib import Path

# set device to cuda if available, else cpu
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.cuda.set_device(4)
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B", use_auth_token=True)

# Fix missing pad token
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Meta-Llama-3-8B",
    device_map=None,
    torch_dtype=torch.float16 if device == "cuda" else torch.float32
)
model.to(device)
model.eval()

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 4/4 [00:14<00:00,  3.61s/it]


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((4096,), eps=1e-05)
    (rotary_

In [None]:
class LLaMACommandGenerator:
    role_text: str = ""
    action_text: str = ""
    
    def __init__(self, model_name="meta-llama/Meta-Llama-3-8B", device="cuda", save_name="lama-8b", tokenizer=None, model=None):
        
        if tokenizer is not None and model is not None:
            self.tokenizer = tokenizer
            self.model = model
            self.device = model.device
        else:
            self.device = device if torch.cuda.is_available() and device == "cuda" else "cpu"
            # set gpu id
            if self.device == "cuda":
                torch.cuda.set_device(4)
            print(f"Using device: {self.device}")
            self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
            self.tokenizer.pad_token = self.tokenizer.eos_token
            self.model = AutoModelForCausalLM.from_pretrained(
                model_name,
                device_map=None,
                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
            )
            self.model.to(self.device)
            self.model.eval()

        self.save_name = save_name
    
    @classmethod
    def read_my_file(cls):
        # Read "role.txt" from the same folder as this script
        try:
            folder = Path(__file__).parent  # works only in .py files
        except NameError:
            folder = Path.cwd()             # fallback for notebooks or interactive mode
        file_path = folder / "role.txt"
        text = file_path.read_text()
        # 2. assign to cls.role_prompt
        cls.role_text = text

        # 3. assign to cls.action_text
        action_file_path = folder / "action.txt"
        if action_file_path.exists():
            cls.action_text = action_file_path.read_text().strip()
        else:
            cls.action_text = "You are a helpful assistant. Please provide a single command based on the task and observations."

    def generate_prompt(self, obs, task):
        """
        Step 1: Select a role prompt based on task description. -> done
        Step 2: [SEP] refers to previous action or observation, group them to previous actions.
        Step 3: Regulate user output to only one command with templates - alfred.twl2.
        """
        
        # Step 1: select role prompt based on task description
        if "put a" in task or "put some" in task:
            # Pick & Place
            role_prompt = self.__class__.role_text.split("**Pick & Place**")[1].split("---")[0].strip()
        elif "look at" in task or "examine" in task:
            # Examine in Light
            role_prompt = self.__class__.role_text.split("**Examine in Light**")[1].split("---")[0].strip()
        elif "clean" in task and "put" in task:
            # Clean & Place
            role_prompt = self.__class__.role_text.split("**Clean & Place**")[1].split("---")[0].strip()
        elif "heat" in task and "put" in task:
            # Heat & Place
            role_prompt = self.__class__.role_text.split("**Heat & Place**")[1].split("---")[0].strip()
        elif "cool" in task and "put" in task:
            # Cool & Place
            role_prompt = self.__class__.role_text.split("**Cool & Place**")[1].split("---")[0].strip()
        elif "put two" in task or "find two" in task:
            # Pick Two & Place
            role_prompt = self.__class__.role_text.split("**Pick Two & Place**")[1].split("---")[0].strip()
        else:
            role_prompt = ""  # fallback or raise an error/log warning

        # Step 2: Group previous actions and observations
        obs_split = obs.split("[SEP]")
        env_prompt = obs_split[0].strip()  # Initial environment description
        observations = [s.strip() for s in obs_split[1:] if s.strip()]  # Filter out empty strings
        
        prompt = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n"
        prompt += f"{role_prompt}\n\n"
        prompt += "<|start_header_id|>user<|end_header_id|>\n"
        prompt += f"Environment: {env_prompt}\n"
        prompt += f"\nTask: {task}\n"
        prompt += "Previous Actions and Observations:\n"
        for i, observation in enumerate(observations):
            if i % 2 == 0:
                prompt += f"Observation {i//2 + 1}: {observation}\n"
            else:
                prompt += f"Action {i//2 + 1}: {observation}\n"
        
        prompt += self.__class__.action_text + "\n"
        prompt += "You must respond: **The action you should take is:** `**[your_action_here]**` \n"
        prompt += "<|start_header_id|>assistant<|end_header_id|>\n"
        return prompt

    def command_generation_lama(self, observation_strings, task_desc_strings):
        res = []
        read_my_file = self.__class__.read_my_file
        if not hasattr(self.__class__, 'role_text') or not self.__class__.role_text:
            read_my_file()
        if not self.__class__.role_text:
            raise ValueError("Role text not loaded. Please ensure 'role.txt' is present in the same directory as this script.")
        
        for obs, task in zip(observation_strings, task_desc_strings):
            # Construct prompt
            prompt = self.generate_prompt(obs, task)
            # print(f"Generated prompt: {prompt}")

            # Tokenize prompt (no padding needed for single input)
            input_ids = self.tokenizer.encode(prompt, return_tensors="pt", padding=True, truncation=True, max_length=2048, return_attention_mask=True).to(self.device)
            attention_mask = (input_ids != self.tokenizer.pad_token_id).long()

            # Generate response (small token limit for speed)
            with torch.no_grad():
                output_ids = self.model.generate(
                    input_ids,
                    attention_mask=attention_mask,
                    max_new_tokens=512,
                    temperature=0.7,
                    top_p=1.0,
                    do_sample=True,
                    pad_token_id=self.tokenizer.eos_token_id
                )

            # Decode and remove the prompt prefix
            generated_text = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
            # response_text = generated_text[len(prompt):].strip()
            
            # response_text = response_text.split("\n")[0].strip()

            res.append(generated_text)
            # res.append(response_text)

        return res, None  # current_dynamics is unused

In [14]:
generator = LLaMACommandGenerator(model_name="meta-llama/Meta-Llama-3-8B", device="cuda", save_name="lama-8b", tokenizer=tokenizer, model=model)

In [15]:
observation_strings = ['-= Welcome to TextWorld, ALFRED! =- You are in the middle of a room. Looking quickly around you, you see a cabinet 1, a cabinet 10, a cabinet 11, a cabinet 12, a cabinet 2, a cabinet 3, a cabinet 4, a cabinet 5, a cabinet 6, a cabinet 7, a cabinet 8, a cabinet 9, a coffeemachine 1, a countertop 1, a countertop 2, a diningtable 1, a drawer 1, a drawer 2, a drawer 3, a fridge 1, a garbagecan 1, a microwave 1, a sinkbasin 1, a stoveburner 1, a stoveburner 2, a stoveburner 3, a stoveburner 4, and a toaster 1. [SEP] You arrive at cabinet 12. The cabinet 12 is closed. [SEP] go to cabinet 12 [SEP] You open the cabinet 12. The cabinet 12 is open. In it, you see a bowl 3. [SEP] open cabinet 12 [SEP] You close the cabinet 12. [SEP] close cabinet 12', '-= Welcome to TextWorld, ALFRED! =- You are in the middle of a room. Looking quickly around you, you see a cabinet 1, a cabinet 10, a cabinet 11, a cabinet 12, a cabinet 13, a cabinet 14, a cabinet 15, a cabinet 16, a cabinet 17, a cabinet 18, a cabinet 19, a cabinet 2, a cabinet 20, a cabinet 21, a cabinet 22, a cabinet 23, a cabinet 24, a cabinet 25, a cabinet 26, a cabinet 3, a cabinet 4, a cabinet 5, a cabinet 6, a cabinet 7, a cabinet 8, a cabinet 9, a coffeemachine 1, a countertop 1, a countertop 2, a countertop 3, a drawer 1, a drawer 10, a drawer 11, a drawer 12, a drawer 2, a drawer 3, a drawer 4, a drawer 5, a drawer 6, a drawer 7, a drawer 8, a drawer 9, a fridge 1, a garbagecan 1, a microwave 1, a sinkbasin 1, a stoveburner 1, a stoveburner 2, a stoveburner 3, a stoveburner 4, and a toaster 1. [SEP] You arrive at drawer 11. The drawer 11 is closed. [SEP] go to drawer 11 [SEP] You open the drawer 11. The drawer 11 is open. In it, you see nothing. [SEP] open drawer 11 [SEP] You close the drawer 11. [SEP] close drawer 11', '-= Welcome to TextWorld, ALFRED! =- You are in the middle of a room. Looking quickly around you, you see a cabinet 1, a cabinet 2, a cabinet 3, a cabinet 4, a cabinet 5, a cabinet 6, a cabinet 7, a cabinet 8, a cabinet 9, a coffeemachine 1, a countertop 1, a countertop 2, a drawer 1, a drawer 10, a drawer 11, a drawer 12, a drawer 13, a drawer 2, a drawer 3, a drawer 4, a drawer 5, a drawer 6, a drawer 7, a drawer 8, a drawer 9, a fridge 1, a garbagecan 1, a microwave 1, a sinkbasin 1, a stoveburner 1, a stoveburner 2, a stoveburner 3, a stoveburner 4, a stoveburner 5, a stoveburner 6, and a toaster 1. [SEP] You close the drawer 12. [SEP] close drawer 12 [SEP] You arrive at drawer 10. On the drawer 10, you see nothing. [SEP] go to drawer 10 [SEP] You arrive at drawer 12. The drawer 12 is closed. [SEP] go to drawer 12']
task_desc_strings = ['put a cool plate in cabinet.', 'put two spatula in drawer.', 'put a clean butterknife in drawer.']
Actions = ['go to cabinet 12', 'go to countertop 3', 'open drawer 12']

commands, _ = generator.command_generation_lama(observation_strings, task_desc_strings)

for cmd, baseline in zip(commands, Actions):
    print(f"Generated Command: {cmd} ")
    print(f"Baseline Command: {baseline}\n")

Generated Command: system
You are an embodied agent whose job is to execute “Pick & Place” tasks and must find an object of the desired type, pick it up, find the correct location to place it, and put it down there.

user
Environment: -= Welcome to TextWorld, ALFRED! =- You are in the middle of a room. Looking quickly around you, you see a cabinet 1, a cabinet 10, a cabinet 11, a cabinet 12, a cabinet 2, a cabinet 3, a cabinet 4, a cabinet 5, a cabinet 6, a cabinet 7, a cabinet 8, a cabinet 9, a coffeemachine 1, a countertop 1, a countertop 2, a diningtable 1, a drawer 1, a drawer 2, a drawer 3, a fridge 1, a garbagecan 1, a microwave 1, a sinkbasin 1, a stoveburner 1, a stoveburner 2, a stoveburner 3, a stoveburner 4, and a toaster 1.

Task: put a cool plate in cabinet.
Previous Actions and Observations:
Observation 1: You arrive at cabinet 12. The cabinet 12 is closed.
Action 1: go to cabinet 12
Observation 2: You open the cabinet 12. The cabinet 12 is open. In it, you see a bowl 3.
