In [6]:
# Import all necessary libraries
import gymnasium as gym
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, clear_output, HTML
from IPython.display import Video
import glob
import io
import base64
import PIL.Image
import time
import seaborn as sns

# Import our custom environment
# Use importlib to ensure we get the latest version (clears cache)
import importlib
import prisoners_dilemma_env
importlib.reload(prisoners_dilemma_env)

from prisoners_dilemma_env import IteratedPrisonersDilemma, COOPERATE, DEFECT, ACTION_MAP

print("All libraries imported successfully!")
print(f"✓ Successfully imported IteratedPrisonersDilemma: {IteratedPrisonersDilemma}")


All libraries imported successfully!
✓ Successfully imported IteratedPrisonersDilemma: <class 'prisoners_dilemma_env.IteratedPrisonersDilemma'>


In [None]:
# Test 1: Initialize environment with different strategies
print("=" * 60)
print("Testing Environment Initialization")
print("=" * 60)

strategies = ["ALL-C", "ALL-D", "TFT", "IMPERFECT-TFT"]
memory_schemes = [1, 2]

for strategy in strategies:
    for memory in memory_schemes:
        try:
            env = IteratedPrisonersDilemma(opponent_strategy=strategy, memory_scheme=memory)
            print(f"✓ Successfully created env: strategy={strategy}, memory={memory}")
            print(f"  - Action space: {env.action_space}")
            print(f"  - Observation space: {env.observation_space}")

        except Exception as e:
            print(f"✗ Error creating env: strategy={strategy}, memory={memory}")
            print(f"  Error: {e}")
        print()



Testing Environment Initialization
✓ Successfully created env: strategy=ALL-C, memory=1
  - Action space: Discrete(2)
  - Observation space: Discrete(4)

✓ Successfully created env: strategy=ALL-C, memory=2
  - Action space: Discrete(2)
  - Observation space: Discrete(16)

✓ Successfully created env: strategy=ALL-D, memory=1
  - Action space: Discrete(2)
  - Observation space: Discrete(4)

✓ Successfully created env: strategy=ALL-D, memory=2
  - Action space: Discrete(2)
  - Observation space: Discrete(16)

✓ Successfully created env: strategy=TFT, memory=1
  - Action space: Discrete(2)
  - Observation space: Discrete(4)

✓ Successfully created env: strategy=TFT, memory=2
  - Action space: Discrete(2)
  - Observation space: Discrete(16)

✓ Successfully created env: strategy=IMPERFECT-TFT, memory=1
  - Action space: Discrete(2)
  - Observation space: Discrete(4)

✓ Successfully created env: strategy=IMPERFECT-TFT, memory=2
  - Action space: Discrete(2)
  - Observation space: Discrete(16

In [9]:
# Test 2: Policy Evaluation
print("=" * 60)
print("Testing Policy Evaluation")
print("=" * 60)

# Create an environment (using Memory-1 for simplicity)
env = IteratedPrisonersDilemma(opponent_strategy="TFT", memory_scheme=1)
num_states = env.observation_space.n
num_actions = env.action_space.n

print(f"Environment: TFT opponent, Memory-1")
print(f"Number of states: {num_states}")
print(f"Number of actions: {num_actions}")
print()

# Test Policy 1: Always Cooperate
print("Policy 1: Always Cooperate")
policy_always_cooperate = np.zeros((num_states, num_actions))
policy_always_cooperate[:, COOPERATE] = 1.0  # Probability 1.0 for cooperate in all states
print(f"Policy shape: {policy_always_cooperate.shape}")
print(f"Policy matrix:\n{policy_always_cooperate}")
print()

try:
    value_function = env.policy_evaluation(policy_always_cooperate, gamma=0.9, theta=1e-6)
    print(f"✓ Policy evaluation successful!")
    print(f"Value function shape: {value_function.shape}")
    print(f"Value function: {value_function}")
    print(f"Mean value: {np.mean(value_function):.4f}")
    print()
except Exception as e:
    print(f"✗ Error in policy evaluation: {e}")
    import traceback
    traceback.print_exc()
    print()

# Test Policy 2: Always Defect
print("Policy 2: Always Defect")
policy_always_defect = np.zeros((num_states, num_actions))
policy_always_defect[:, DEFECT] = 1.0  # Probability 1.0 for defect in all states
print(f"Policy matrix:\n{policy_always_defect}")
print()

try:
    value_function = env.policy_evaluation(policy_always_defect, gamma=0.9, theta=1e-6)
    print(f"✓ Policy evaluation successful!")
    print(f"Value function shape: {value_function.shape}")
    print(f"Value function: {value_function}")
    print(f"Mean value: {np.mean(value_function):.4f}")
    print()
except Exception as e:
    print(f"✗ Error in policy evaluation: {e}")
    import traceback
    traceback.print_exc()
    print()

# Test Policy 3: Random (50/50)
print("Policy 3: Random (50/50)")
policy_random = np.ones((num_states, num_actions)) * 0.5  # Equal probability for both actions
print(f"Policy matrix:\n{policy_random}")
print()

try:
    value_function = env.policy_evaluation(policy_random, gamma=0.9, theta=1e-6)
    print(f"✓ Policy evaluation successful!")
    print(f"Value function shape: {value_function.shape}")
    print(f"Value function: {value_function}")
    print(f"Mean value: {np.mean(value_function):.4f}")
    print()
except Exception as e:
    print(f"✗ Error in policy evaluation: {e}")
    import traceback
    traceback.print_exc()
    print()


Testing Policy Evaluation
Environment: TFT opponent, Memory-1
Number of states: 4
Number of actions: 2

Policy 1: Always Cooperate
Policy shape: (4, 2)
Policy matrix:
[[1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]]

✗ Error in policy evaluation: 'IteratedPrisonersDilemma' object has no attribute 'policy_evaluation'

Policy 2: Always Defect
Policy matrix:
[[0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]]

✗ Error in policy evaluation: 'IteratedPrisonersDilemma' object has no attribute 'policy_evaluation'

Policy 3: Random (50/50)
Policy matrix:
[[0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]]

✗ Error in policy evaluation: 'IteratedPrisonersDilemma' object has no attribute 'policy_evaluation'



Traceback (most recent call last):
  File "/var/folders/zh/jlwy1dy16q9d290zpqgy3j6r0000gn/T/ipykernel_29411/1429095351.py", line 25, in <module>
    value_function = env.policy_evaluation(policy_always_cooperate, gamma=0.9, theta=1e-6)
AttributeError: 'IteratedPrisonersDilemma' object has no attribute 'policy_evaluation'
Traceback (most recent call last):
  File "/var/folders/zh/jlwy1dy16q9d290zpqgy3j6r0000gn/T/ipykernel_29411/1429095351.py", line 45, in <module>
    value_function = env.policy_evaluation(policy_always_defect, gamma=0.9, theta=1e-6)
AttributeError: 'IteratedPrisonersDilemma' object has no attribute 'policy_evaluation'
Traceback (most recent call last):
  File "/var/folders/zh/jlwy1dy16q9d290zpqgy3j6r0000gn/T/ipykernel_29411/1429095351.py", line 64, in <module>
    value_function = env.policy_evaluation(policy_random, gamma=0.9, theta=1e-6)
AttributeError: 'IteratedPrisonersDilemma' object has no attribute 'policy_evaluation'
