In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_name = "mistralai/Mistral-7B-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_4bit=True)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Loading checkpoint shards: 100%|██████████| 2/2 [00:35<00:00, 17.61s/it]


In [29]:
# with torch.no_grad():
#     prompt = """You are an intelligent robot. Your goal is to drop a knife in the living room. Knife is in the kitchen. You can navigate the environment, pick up items, and drop them.

#     You are in the living room. 
#     You see: couch, television, book.
#     You have the following items in your inventory: .

#     Valid actions include:
#     pick up couch
#     pick up television
#     pick up book
#     go to living room
#     go to kitchen

#     Q: What action should you take that is the most relevant to your goal?
#     A: """

#     actions = [
#     "pick up couch",
#     "pick up television",
#     "pick up book",
#     "go to living room",
#     "go to kitchen"
#     ]

#     # Tokenize the input prompt
#     inputs = tokenizer(prompt, return_tensors="pt").to(device)

#     # Obtain logits
#     outputs = model(**inputs)
#     logits = outputs.logits

#     # Obtain log probabilities by applying log softmax to logits
#     log_probs = torch.log_softmax(logits, dim=1)

In [None]:
log_probs.shape

In [3]:
prompt = """You are an intelligent robot. Your goal is to drop a knife in the living room. Knife is in the kitchen. You can navigate the environment, pick up items, and drop them. You are in the living room. You see: couch, television, book. You have the following items in your inventory: .
Valid actions include:
pick up couch
pick up television
pick up book
go to living room
go to kitchen

Action: """

prompt = """You are an intelligent robot. Your goal is to drop a knife in the living room. Knife is in the kitchen. You can navigate the environment, pick up items, and drop them.

You are in the living room.
You see: couch, television, book.       
You have the following items in your inventory: .
Valid actions: pick up couch, pick up television, pick up book, go to kitchen, go to bedroom, go to bathroom

Action: """

actions = [
    "pick up couch",
    "pick up television",
    "pick up book",
    "go to living room",
    "go to kitchen",
]

In [None]:
def get_action_probability(prompt, action):
    # Tokenize the prompt + action
    inputs = tokenizer(prompt, return_tensors="pt")
    action_tokens = tokenizer(action, return_tensors="pt")["input_ids"]

    # Get logits from the model for the prompt
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits

    # Softmax over the logits to get the probability distribution
    probs = torch.nn.functional.softmax(logits, dim=-1)

    # Extract probabilities for each token in the action
    action_prob = 1.0
    for i, token_id in enumerate(action_tokens[0]):
        action_prob *= probs[0, -(len(action_tokens[0]) - i), token_id].item()  # Prob for each token

    return action_prob


scores = []
for action in actions:
    score = get_action_probability(prompt, action)
    print(f"{action} | {score}")
    scores.append(score)

index = scores.index(max(scores))
print(f"Best Action: {actions[index]}")



In [12]:
print(repr(prompt))

def score_action(prompt: str, action: str):

    print(prompt + action)

    # Tokenize prompt + action, as well as action
    input_tokens = tokenizer(prompt + action, return_tensors="pt")
    action_tokens = tokenizer(action, return_tensors="pt")["input_ids"]

    # Compute logits
    with torch.no_grad():
        outputs = model(**input_tokens)
        logits = outputs.logits

    # Compute probability distribution
    probs = torch.nn.functional.softmax(logits, dim=-1)
    action_score = 1.0
    
    for idx, action_token_id in enumerate(action_tokens[0]):
        action_token_position = input_tokens["input_ids"].shape[1] - action_tokens.shape[1] + idx

        # look up the probability
        action_probability = probs[0, action_token_position, action_token_id].item()
        action_score *= action_probability
        
    return action_score
        

# Instruction, as used in the paper
scores = []
prompt = """You are an intelligent robot. Your goal is to drop a knife in the living room. Knife is in the kitchen. You can navigate the environment, pick up items, and drop them.

You are in the living room. 
You see: couch, television, book.
You have the following items in your inventory: .
Valid actions: pick up couch, pick up television, pick up book, go to kitchen, go to bathroom, go to bedroom

Action: """
for action in actions:
    # Probability that a skill is useful for instruction
    score = score_action(prompt, action)
    print(f"{action} | {score}")
    scores.append(score)

arg_max = actions[scores.index(max(scores))]
print(arg_max)


'You are an intelligent robot. Your goal is to drop a knife in the living room. Knife is in the kitchen. You can navigate the environment, pick up items, and drop them.\n\nYou are in the living room.\nYou see: couch, television, book.\nYou have the following items in your inventory: .\nValid actions: pick up couch, pick up television, pick up book, go to kitchen, go to bedroom, go to bathroom\n\nAction: '
You are an intelligent robot. Your goal is to drop a knife in the living room. Knife is in the kitchen. You can navigate the environment, pick up items, and drop them.

You are in the living room. 
You see: couch, television, book.
You have the following items in your inventory: .
Valid actions: pick up couch, pick up television, pick up book, go to kitchen, go to bathroom, go to bedroom

Action: pick up couch
pick up couch | 3.5558075346929936e-26
You are an intelligent robot. Your goal is to drop a knife in the living room. Knife is in the kitchen. You can navigate the environment, 

In [None]:
def score_action(prompt: str, action: str):

    # Construct the full prompt with the action
    full_prompt = f"{prompt} {action}"

    # Tokenize the full prompt
    inputs = tokenizer(full_prompt, return_tensors="pt").to(device)

    # Obtain logits for the entire prompt
    outputs = model(**inputs)
    logits = outputs.logits

    # Obtain log probabilities
    log_probs = torch.log_softmax(logits, dim=-1)

    # Get the action tokens
    action_tokens = tokenizer(action, return_tensors="pt").to(device).input_ids[0]
    
    # Find the starting index of the action tokens in the full input
    num_prompt_tokens = inputs.input_ids.shape[1] - action_tokens.shape[0]

    # Extract scores for each action token
    action_scores = []
    for idx, token_id in enumerate(action_tokens):
        action_token_score = log_probs[0, num_prompt_tokens + idx, token_id]
        action_scores.append(action_token_score.item())

    # Compute the average score for the action
    average_score = max(action_scores)

    return average_score  # Return the list of scores for the action tokens

    # # Tokenize actions (each element is a token_id, which corresponds to log_probs)
    # action_tokens = tokenizer(action, return_tensors="pt").to(device).input_ids[0]
    # total_score = 0.0

    # # Iterate through all tokens in the action
    # for idx, token_id in enumerate(action_tokens):

    #     # Tokenize the input prompt
    #     inputs = tokenizer(prompt, return_tensors="pt").to(device)
    #     # Obtain logits
    #     outputs = model(**inputs)
    #     logits = outputs.logits
    #     # Obtain log probabilities by applying log softmax to logits
    #     log_probs = torch.log_softmax(logits, dim=1)
    #     # Get the number of tokens in the inputs
    #     num_input_tokens = inputs.input_ids.shape[1]

    #     # Obtain current token score from log_probabilities
    #     action_token_score = log_probs[0, num_input_tokens + idx - 1, token_id]

    #     print("Token score", action_token_score)

        

        

    return action_tokens


action_scores = {}
for action in actions:
    score = score_action(prompt, action)
    action_scores[action] = score


for action, score in action_scores.items():
    print(f"Action: {action}, Score: {score}")

In [None]:
log_probs.shape

In [None]:
log_probs[0, prompt_len - 2]

In [None]:
answer_tokens

In [None]:
# The reason we do prompt_len - 1 + 1 is because:
# indexing starts at 0, so we subtract 1 from prompt_len
# Since we are accumulating probabiltiy tokens, we need to add the i to shift it incrementally so that we are not just
# looking at "ap" in "apple", or "ora" in "orange", for example


    for answer in answers:
        print("-----------------")

        # Note that answer_tokens is stored as [[values]] (but we only have 1 batch)
        answer_tokens = tokenizer(answer, return_tensors="pt").to(device).input_ids
        total_log_prob = 0.0

        # Iterate through all tokens in the answer token
        # LOOK AT ANSWER:
        print(f"Action {answer}")
        for i, token_id in enumerate(answer_tokens[0]):
            print(f"Token Index: {i}, Token ID: {token_id}")

            

            # Lookup the log probability of this token in the log_probs distribution
            # token_log_prob = log_probs[0, index - 1, token_id]
            # total_log_prob += token_log_prob
            # print(f"Current Token Log Probability: {token_log_prob}, Total Log Probability: {total_log_prob}")


        answer_log_probs[answer] = total_log_prob

    Print out the log-probabilities for each answer
    print("------------")
    for answer, log_prob in answer_log_probs.items():
        print(f"Answer: {answer}, Log Probability: {log_prob}")