# Game Reasoning Arena - Simple Tic-Tac-Toe Demo

<a href="https://colab.research.google.com/github/lcipolina/open_spiel_arena/blob/main/colabs/LLM_OpenSpiel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This notebook demonstrates how to use the **Game Reasoning Arena** framework to run a simple tic-tac-toe game between a lightweight Large Language Model (LLM) and a random bot.

## Features
- Uses the Game Reasoning Arena framework
- Lightweight LLM vs Random bot gameplay
- Clean and simple demonstration
- Real game logging and analysis

In [None]:
# Install dependencies
!pip install openspiel litellm pyyaml

# Clone the Game Reasoning Arena repository
!git clone https://github.com/SLAMPAI/game_reasoning_arena.git
%cd game_reasoning_arena

# Install the package
!pip install .

# Set up environment
import os
import sys
sys.path.append('/content/game_reasoning_arena/src')

print("Game Reasoning Arena setup complete!")

In [None]:
# Import Game Reasoning Arena components
from game_reasoning_arena.arena.games.registry import registry
from game_reasoning_arena.arena.agents.policy_manager import initialize_policies
from game_reasoning_arena.backends import initialize_llm_registry
from game_reasoning_arena.arena.utils.seeding import set_seed
import logging

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

print("Imports successful!")

In [None]:
# Configuration for a simple tic-tac-toe game
config = {
    "env_configs": [
        {
            "game_name": "tic_tac_toe",
            "max_game_rounds": None
        }
    ],
    "num_episodes": 3,  # Run 3 games
    "seed": 42,
    "use_ray": False,
    "mode": "llm_vs_random",
    "agents": {
        "player_0": {
            "type": "llm",
            "model": "litellm_groq/gemma-7b-it"  # Lightweight model
        },
        "player_1": {
            "type": "random"
        }
    },
    "llm_backend": {
        "max_tokens": 50,
        "temperature": 0.1,
        "default_model": "litellm_groq/gemma-7b-it"
    },
    "log_level": "INFO"
}

def run_tic_tac_toe_demo(config):
    """Run a simple tic-tac-toe demonstration."""

    # Set seed for reproducibility
    set_seed(config["seed"])

    # Initialize LLM backends
    initialize_llm_registry(config)

    # Get the game configuration
    env_config = config["env_configs"][0]
    game_name = env_config["game_name"]

    print(f" Starting {game_name.replace('_', ' ').title()} Demo")
    print(f" LLM Model: {config['agents']['player_0']['model']}")
    print(f" Random Bot vs LLM")
    print(f" Episodes: {config['num_episodes']}")
    print("-" * 50)

    # Create environment
    env = registry.make_env(game_name, config)

    # Initialize agent policies
    policies_dict = initialize_policies(config, game_name, config["seed"])

    # Mapping from player IDs to agents
    player_to_agent = {0: policies_dict["policy_0"], 1: policies_dict["policy_1"]}

    episode_results = []

    for episode in range(config["num_episodes"]):
        print(f"\n🎯 Episode {episode + 1}")
        print("=" * 30)

        # Reset environment
        observation_dict, _ = env.reset(seed=config["seed"] + episode)

        # Game variables
        episode_rewards = {0: 0, 1: 0}
        terminated = False
        truncated = False
        step_count = 0

        while not (terminated or truncated):
            step_count += 1
            print(f"\n Step {step_count}")

            # Show current board state
            print("Current board:")
            print(env.render_board(0))

            # Determine which player(s) should act
            if env.state.is_simultaneous_node():
                # All players act simultaneously (not typical for tic-tac-toe)
                active_players = list(player_to_agent.keys())
            else:
                # Turn-based: only current player acts
                current_player = env.state.current_player()
                active_players = [current_player]
                print(f"Player {current_player}'s turn ({'LLM' if current_player == 0 else 'Random Bot'})")

            # Compute actions for active players
            action_dict = {}
            for player_id in active_players:
                agent = player_to_agent[player_id]
                observation = observation_dict[player_id]

                # Get action from agent
                action_result = agent.compute_action(observation)

                if isinstance(action_result, dict):
                    action = action_result.get("action")
                    reasoning = action_result.get("reasoning", "No reasoning provided")
                else:
                    action = action_result
                    reasoning = "Random choice"

                action_dict[player_id] = action

                # Log the action
                agent_type = "LLM" if player_id == 0 else "Random Bot"
                print(f"  {agent_type} chooses action {action}")
                if player_id == 0 and reasoning:
                    print(f"  Reasoning: {reasoning[:100]}...")

            # Take environment step
            observation_dict, rewards, terminated, truncated, info = env.step(action_dict)

            # Update episode rewards
            for player_id, reward in rewards.items():
                episode_rewards[player_id] += reward

        # Episode finished
        print(f"\n Episode {episode + 1} Complete!")
        print("Final board:")
        print(env.render_board(0))

        # Determine winner
        if episode_rewards[0] > episode_rewards[1]:
            winner = "LLM (Player 0)"
        elif episode_rewards[1] > episode_rewards[0]:
            winner = "Random Bot (Player 1)"
        else:
            winner = "Draw"

        print(f" Winner: {winner}")
        print(f" Scores: LLM={episode_rewards[0]}, Random={episode_rewards[1]}")

        episode_results.append({
            "episode": episode + 1,
            "winner": winner,
            "llm_score": episode_rewards[0],
            "random_score": episode_rewards[1],
            "steps": step_count
        })

    # Summary
    print("\n" + "="*50)
    print(" TOURNAMENT SUMMARY")
    print("="*50)

    llm_wins = sum(1 for r in episode_results if r["llm_score"] > r["random_score"])
    random_wins = sum(1 for r in episode_results if r["random_score"] > r["llm_score"])
    draws = sum(1 for r in episode_results if r["llm_score"] == r["random_score"])

    print(f"LLM Wins: {llm_wins}/{config['num_episodes']} ({llm_wins/config['num_episodes']*100:.1f}%)")
    print(f"Random Wins: {random_wins}/{config['num_episodes']} ({random_wins/config['num_episodes']*100:.1f}%)")
    print(f"Draws: {draws}/{config['num_episodes']} ({draws/config['num_episodes']*100:.1f}%)")

    return episode_results

# Run the demo
print(" Starting Game Reasoning Arena Tic-Tac-Toe Demo")
print("This demonstrates how to use the framework for simple LLM vs Random gameplay")

# Note: You'll need to set up API keys for the LLM to work
# For Groq: set GROQ_API_KEY environment variable
print("\n  Note: Make sure to set up your API keys for the LLM to work!")
print("For Groq models, set: GROQ_API_KEY environment variable")

try:
    results = run_tic_tac_toe_demo(config)
    print("\n Demo completed successfully!")
except Exception as e:
    print(f"\n Demo failed: {e}")
    print("This might be due to missing API keys or network issues.")

In [None]:
# 🔑 API Key Setup - REQUIRED for LLM vs Random gameplay
import os

print(" API Key Setup Instructions:")
print("1. Get a free Groq API key from: https://console.groq.com/")
print("2. Uncomment and set your API key below:")
print("3. Then run the LLM demo in the previous cell")
print()

# UNCOMMENT THE LINE BELOW AND ADD YOUR API KEY:
# os.environ["GROQ_API_KEY"] = "gsk_your_actual_groq_api_key_here"

# You can also set other provider keys if needed:
# os.environ["OPENAI_API_KEY"] = "your_openai_key_here"
# os.environ["TOGETHER_API_KEY"] = "your_together_ai_key_here"

print(" Tip: Keep your API keys secure and never commit them to public repositories!")
print()

# Fallback demo without API keys - uses random vs random
print(" Running fallback demo (Random vs Random) to test framework:")

fallback_config = {
    "env_configs": [{"game_name": "tic_tac_toe", "max_game_rounds": None}],
    "num_episodes": 2,
    "seed": 42,
    "use_ray": False,
    "mode": "random_vs_random",
    "agents": {
        "player_0": {"type": "random"},
        "player_1": {"type": "random"}
    },
    "log_level": "INFO"
}

try:
    fallback_results = run_tic_tac_toe_demo(fallback_config)
    print("\n Framework test completed - everything is working!")
    print(" Now set your API key above to try LLM vs Random!")
except Exception as e:
    print(f" Framework setup issue: {e}")
    print("Please check the installation in the previous cells.")