In [3]:
import gymnasium as gym
import numpy as np
import time
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions.categorical import Categorical
import matplotlib.pyplot as plt
from cellitaire.environment.cellitaire_env import CellitaireEnv
from cellitaire.environment.rewards.reward import *
import random

In [2]:
board_rows = 7
board_cols = 12
num_reserved = 6
test_reward = CombinedReward([
    PlacedCardInFoundationReward(weight=6),
    WinReward(),
    ConstantReward(weight=0.5),
    #PlayedLegalMoveReward(weight=1),
    #PeriodicPlacedCardInFoundationReward(weight=4, reward_period=3),
    CreatedMovesReward(weight=1, num_reserved=num_reserved, foundation_count_dropoff=30)
])
env = CellitaireEnv(test_reward, rows=board_rows, cols=board_cols, num_reserved=num_reserved, max_moves=600)

In [18]:
games_to_sim = 10000

rewards = []
cards_saved = []
game_scores = []
move_counts = []

games_played = 0
while games_played < games_to_sim:
    env.reset()
    moves_played = 0
    done = False
    truncated = False
    game_rewards = []

    while not done and not truncated:
        legal_actions = env.get_legal_actions_as_int()
        action = random.choice(legal_actions)
        _, reward, done, truncated, _ = env.step(action)
        game_rewards.append(reward)
        moves_played += 1
    rewards.extend(game_rewards)
    cards_saved.append(env.game.foundation.total_cards())
    game_scores.append(sum(game_rewards))
    move_counts.append(moves_played)
    games_played += 1

In [20]:
print(f'Average reward {np.mean(rewards)}')
print(f'Average cards saved {np.mean(cards_saved)}')
print(f'Average game score {np.mean(game_scores)}')
print(f'Average move count {np.mean(move_counts)}')

Average reward 2.166237351965768
Average cards saved 7.4288
Average game score 245.7306
Average move count 113.4366
