# Experiments with Gemma

In [None]:
import torch
import accelerate
torch.set_default_device('cuda')
torch.cuda.device("cuda")
torch.backends.cuda.matmul.allow_tf32 = True
torch.set_float32_matmul_precision('high')

In [None]:
import textworld

# create a game
!tw-make tw-simple --rewards dense    --goal detailed --seed 18 --test --silent -f --output games/test-game.z8

In [None]:
# create a play function for playing + recording scores

import os
from glob import glob

import textworld.gym

import torch


def play(agent, path, max_step=100, nb_episodes=10, verbose=True):
    torch.manual_seed(46)  # For reproducibility when using action sampling.

    infos_to_request = agent.infos_to_request
    infos_to_request.max_score = True  # Needed to normalize the scores.

    gamefiles = [path]
    if os.path.isdir(path):
        gamefiles = glob(os.path.join(path, "*.z8"))

    env_id = textworld.gym.register_games(gamefiles,
                                          request_infos=infos_to_request,
                                          max_episode_steps=max_step)
    env = textworld.gym.make(env_id)  # Create a Gym environment to play the text game.
    if verbose:
        if os.path.isdir(path):
            print(os.path.dirname(path), end="")
        else:
            print(os.path.basename(path), end="")

    # Collect some statistics: nb_steps, final reward.
    avg_moves, avg_scores, avg_norm_scores = [], [], []
    for no_episode in range(nb_episodes):
        obs, infos = env.reset()  # Start new episode.

        score = 0
        done = False
        nb_moves = 0
        while not done:
            command = agent.act(obs, score, done, infos)
            obs, score, done, infos = env.step(command)
            nb_moves += 1

        agent.act(obs, score, done, infos)  # Let the agent know the game is done.

        if verbose:
            print(".", end="")
        avg_moves.append(nb_moves)
        avg_scores.append(score)
        avg_norm_scores.append(score / infos["max_score"])

    env.close()
    if verbose:
        if os.path.isdir(path):
            msg = "  \tavg. steps: {:5.1f}; avg. normalized score: {:4.1f} / {}."
            print(msg.format(np.mean(avg_moves), np.mean(avg_norm_scores), 1))
        else:
            msg = "  \tavg. steps: {:5.1f}; avg. score: {:4.1f} / {}."
            print(msg.format(np.mean(avg_moves), np.mean(avg_scores), infos["max_score"]))

In [None]:
# create agents

from typing import Mapping, Any
import numpy as np
import textworld.gym

class RandomAgent(textworld.gym.Agent):
    """ Agent that randomly selects a command from the admissible ones. """
    def __init__(self, seed=1234):
        self.seed = seed
        self.rng = np.random.RandomState(self.seed)

    @property
    def infos_to_request(self) -> textworld.EnvInfos:
        return textworld.EnvInfos(admissible_commands=True)

    def act(self, obs: str, score: int, done: bool, infos: Mapping[str, Any]) -> str:
        return self.rng.choice(infos["admissible_commands"])

class HFAgent(textworld.gym.Agent):
    """LLM from HuggingFace that acts as an agent."""
    def __init__(self):
        pass

    @property
    def infos_to_request(self) -> textworld.EnvInfos:
        return textworld.EnvInfos(admissible_commands=True)

    def act(self, obs: str, score: int, done: bool, infos: Mapping[str, Any]) -> str:
        return self.rng.choice(infos["admissible_commands"])

In [None]:
# make the agent play

play(RandomAgent(), "./games/test-game.z8")

In [None]:
!hf auth login --token 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

## Model + tokenizer

In [None]:
from transformers import Gemma3ForCausalLM, AutoTokenizer

checkpoint = "google/gemma-3-4b-it"

model = Gemma3ForCausalLM.from_pretrained(checkpoint, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [None]:
prompt = "What is your favorite condiment?"
inputs = tokenizer(prompt, return_tensors="pt")
generate_ids = model.generate(inputs.input_ids, max_new_tokens=20)
result = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
print(result[0])

## Pipeline

In [None]:
from transformers import pipeline

pipe = pipeline("text-generation", model=checkpoint)

In [None]:
pipe("What can you tell me about the Vietnam war?")

## Comparison

In [None]:
import time
prompt = "Tell me a random word."

start = time.time()
result = pipe(prompt)
print(result[0]["generated_text"])
end = time.time()
print(f"\n-----------------------\nTime through pipeline: {end - start} seconds")

start = time.time()
inputs = tokenizer(prompt, return_tensors="pt")
generate_ids = model.generate(inputs.input_ids, max_new_tokens=10)
result = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
print(result[0])
end = time.time()
print(f"\n-----------------------\nTime through tokenizer + generation: {end - start} seconds")


In [None]:
print(model.hf_device_map)