# ExaOne

In [None]:
import time
import textworld
import numpy as np
import re

In [None]:
import torch
import accelerate
torch.set_default_device('cuda')
torch.cuda.device("cuda")
torch.backends.cuda.matmul.allow_tf32 = True
torch.set_float32_matmul_precision('high')

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
print(model.hf_device_map)

In [None]:
prompt = "Explain what Zork is in one single sentence."  # English example

messages = [
    {"role": "system", 
     "content": "You are EXAONE model from LG AI Research, a helpful assistant."},
    {"role": "user", "content": prompt}
]

start = time.time()

input_ids = tokenizer.apply_chat_template(
    messages,
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt"
)

output = model.generate(
    input_ids.to("cuda"),
    eos_token_id=tokenizer.eos_token_id,
    max_new_tokens=200,
    do_sample=False,
)
print(tokenizer.decode(output[0]))

end = time.time()
print(f"Inference took {(end - start):.3f} seconds")

## Context size and shifting window

In [None]:
token_system = "[|system|]"
token_endofturn = "[|endofturn|]"
token_user = "[|user|]"
token_assistant = "[|assistant|]"

In [None]:
!echo "Downloading zork1.z5 ..."
!wget -q -N https://archive.org/download/Zork1Release88Z-machineFile/zork1.z5
!echo "Done."

In [None]:
# Let the environment know what information we want as part of the game state.
infos = textworld.EnvInfos(
    feedback=True,    # Response from the game after typing a text command.
    description=True, # Text describing the room the player is currently in.
    inventory=True    # Text describing the player's inventory.
)

In [None]:
env = textworld.start('./zork1.z5', request_infos=infos)

In [None]:
system_prompt = "You are an assistant playing a textual game. You analyze the information given carefully and reply exclusively in the form \"verb noun\", e.g. \"open box\" or \"take key\"."
context = token_system + system_prompt + token_endofturn

try:
    done = False
    env.reset()
    while not done:
        game_status = env.render(mode="text")
        print(game_status)
        context += token_user + game_status + token_assistant
        
        start = time.time()    
        input_ids = tokenizer.encode(
            context,
            return_tensors="pt")
        
        output = model.generate(
            input_ids.to("cuda"),
            eos_token_id=tokenizer.eos_token_id,
            max_new_tokens=10,
            do_sample=False)

        response = tokenizer.decode(output[0, input_ids.shape[1]:])
        context += response
        print(response.split("[")[0])
        reply = re.sub('\W+',' ', response.split("[")[0]) # # remove token endofturn and potential unwanted characters, like quotes
        
        end = time.time()
        print(f"Inference took {(end - start):.3f} seconds")
        
        command = reply if len(reply.split()) <= 4 else "look around"
        game_state, reward, done = env.step(command)

    env.render()  # Final message.
except KeyboardInterrupt:
    pass  # Quit the game.

print("Played {} steps, scoring {} points.".format(game_state.moves, game_state.score))

Next: use the notebook `Playing TextWorld generated games with OpenAI Gym.ipynb`