In [1]:
import os 
from typing import List 

import torch
from torch.nn import CrossEntropyLoss
from transformers import AutoModelForCausalLM, AutoTokenizer
import warnings

warnings.filterwarnings("ignore", category=FutureWarning, message="`resume_download` is deprecated and will be removed in version 1.0.0.")

class Agent:
    def __init__(self,
                 model_name) -> None:

        # Utilize my GPU (RTX 3060)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


        # Initialize model and tokenizer
        self.model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_8bit=True)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")

        # Suppress warnings
        self.tokenizer.pad_token = self.tokenizer.eos_token
        self.model.generation_config.pad_token_id = self.tokenizer.pad_token_id


    def generate_response(self, prompt: str) -> str:
        # Tokenize inputs
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)

        # Generate model output ids
        output = self.model.generate(**inputs, return_dict_in_generate=True, max_new_tokens=10)
        output_token_ids = output.sequences[0]

        # Decode output
        decoded_output = self.tokenizer.decode(output_token_ids, skip_special_tokens=True)

        # Remove prompt from generated text (could be done by masking or indexing based off prompt string)
        generated_text = decoded_output[len(prompt):].strip()

        # Obtain action by manipulating the generated text (mistral outputs action in the form of "Action: > action")
        generated_text = generated_text.split('\n')[0][2:]

        # Print prompt and action for debugging
        print("[PROMPT] " + prompt.replace("Action:", "")) # replaced "action" solely for aesthetics
        print("[ACTION] " + generated_text)

        return generated_text
    
    def generate_constrained_response(self, prompt: str, valid_actions: List[str]) -> str:

        prompt = prompt.replace("Action:", "") # Remove "Action:" from prompt

        for action in valid_actions:

            relevance_prompt = prompt + f"\nOn a scale of 1 to 10, that the action {action} is relevant to the long-term goal is: "

            relevance_score = self.obtain_relevance_score(relevance_prompt, action)

            print("[PROMPT] " + prompt)
            print("[ACTION] " + relevance_score)
            

    def obtain_relevance_score(self, prompt:str , action:str):
        # "Task-grounding", according to authors

        # Tokenize inputs
        relevance_inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)

        # Generate model output ids
        relevance_output = self.model.generate(**relevance_inputs, return_dict_in_generate=True, max_new_tokens=2)

        # Decode output
        relevance_decoded_output = self.tokenizer.decode(relevance_output.sequences[0], skip_special_tokens=True)

        return relevance_decoded_output



        

        

        

        # valid_action_scores = []
        # modified_prompt = prompt.replace("Action:", "")

        # for action in valid_actions:
        #     print("Looking at action: ", action)
        #     # "Task-grounding", according to authors
        #     relevance_prompt = modified_prompt + f"\nGiven the action {action}, what is the probability from 0 to 1 that this action is relevant in the long-term to our goal?"

        #     # "World-grounding", according to authors
        #     affordance_prompt = modified_prompt + f"\nGiven the action {action}, what is the probability that this action is feasible in the current environment?"

        #     # Tokenize inputs
        #     relevance_inputs = self.tokenizer(relevance_prompt, return_tensors="pt").to(self.device)
        #     affordance_inputs = self.tokenizer(affordance_prompt, return_tensors="pt").to(self.device)

        #     # Generate model output ids
        #     relevance_output = self.model.generate(**relevance_inputs, return_dict_in_generate=True, max_new_tokens=10)
        #     affordance_output = self.model.generate(**affordance_inputs, return_dict_in_generate=True, max_new_tokens=10)

        #     # Decode output
        #     relevance_decoded_output = self.tokenizer.decode(relevance_output.sequences[0], skip_special_tokens=True)
        #     affordance_decoded_output = self.tokenizer.decode(affordance_output.sequences[0], skip_special_tokens=True)





  from .autonotebook import tqdm as notebook_tqdm


In [2]:
agent = Agent("mistralai/Mistral-7B-v0.1")

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Loading checkpoint shards: 100%|██████████| 2/2 [00:35<00:00, 17.91s/it]


In [3]:
x = agent.generate_constrained_response("You are an intelligent robot. Your goal is to drop a knife in the living room. Knife is in the kitchen. You can navigate the environment, pick up items, and drop them. You are in the living room. You see: couch, television, book. You have the following items in your inventory: .", ["go to bedroom", "go to kitchen", "go to living room"])

[PROMPT] You are an intelligent robot. Your goal is to drop a knife in the living room. Knife is in the kitchen. You can navigate the environment, pick up items, and drop them. You are in the living room. You see: couch, television, book. You have the following items in your inventory: .
[ACTION] You are an intelligent robot. Your goal is to drop a knife in the living room. Knife is in the kitchen. You can navigate the environment, pick up items, and drop them. You are in the living room. You see: couch, television, book. You have the following items in your inventory: .
On a scale of 1 to 10, that the action go to bedroom is relevant to the long-term goal is: 1.
[PROMPT] You are an intelligent robot. Your goal is to drop a knife in the living room. Knife is in the kitchen. You can navigate the environment, pick up items, and drop them. You are in the living room. You see: couch, television, book. You have the following items in your inventory: .
[ACTION] You are an intelligent robot. 

In [None]:
response_without_prompt = x[len(prompt):].strip().split('\n')[0][2:]
print(response_without_prompt)

In [None]:
print(x)