In [1]:
from maddpg.maddpg import MADDPG
from maddpg.buffer import MultiAgentReplayBuffer
from torch.utils.tensorboard import SummaryWriter
from PIL import Image, ImageDraw
import numpy as np
from tools.scopa_env import *
from tools.scopone_scientifico_sim import *
import datetime
import os
from tqdm import tqdm

In [2]:
scenario = f'testMADDPG_{datetime.datetime.now().strftime("%Y%m%d_%H%M%S")}'

In [3]:
env = ScopaEnv()
n_agents = 4
actor_dims = [120 for _ in range(n_agents)]
critic_dims = sum(actor_dims)

In [4]:
n_actions = 40
maddpg_agents = MADDPG(actor_dims, critic_dims, n_agents, n_actions, 
                           fc1=64, fc2=64,  
                           alpha=0.01, beta=0.01, scenario=scenario,
                           chkpt_dir='tmp/maddpg/')

memory = MultiAgentReplayBuffer(1000000, critic_dims, actor_dims, 
                        n_actions, n_agents, batch_size=1024)

In [5]:
writer = SummaryWriter(log_dir=f'runs/{scenario}')

In [6]:
# Preload card images for tensorboard:)
def preload_card_images(image_folder = '/workspace/ScopaAI_ToM/res/cards', scale_factor=0.1):
    """
    Preload all card images and scale them down dramatically to save resources.
    Args:
        image_folder (str): Path to folder containing card images.
        scale_factor (float): Factor by which to scale down images.
    Returns:
        dict: Dictionary mapping card indices to scaled-down images.
    """
    card_images = {}
    for filename in os.listdir(image_folder):
        if filename.endswith(".png"):
            # Extract rank and suit from filename
            card_name = filename.split(".")[0]  # Remove extension
            rank, suit = card_name.split("_of_")
            
            # Map suit to its corresponding value
            suit_values = {"diamonds": 30, "clubs": 20, "spades": 10, "hearts": 0}
            suit_value = suit_values[suit]

            if rank == "jack":
                rank = 8
            elif rank == "queen":
                rank = 9
            elif rank == "king":
                rank = 10
            elif rank == "ace":
                rank = 1
            
            # Calculate card index based on the new rule
            card_index = int(rank) + suit_value - 1
            
            # Load and scale down image
            image = Image.open(os.path.join(image_folder, filename))
            new_size = (int(image.width * scale_factor), int(image.height * scale_factor))
            scaled_image = image.resize(new_size)
            
            # Add to dictionary
            card_images[card_index] = scaled_image
    return card_images

card_images = preload_card_images()

In [7]:
def local_state_to_image(state, action, score = None, player = None):
    if state is None:
        return None

    if len(state.flatten()) != 120:
        raise ValueError('Local State must be 120 elements, received: ', len(state.flatten()))

    # Initialize image sections
    sections = {
        "Hand": state[:40],
        "Table": state[40:80],
        "Captures": state[80:120]
    }

    # Dimensions and image settings
    card_width, card_height = next(iter(card_images.values())).size
    card_width = int(card_width)
    card_height = int(card_height)
    row_width = 10  # Number of cards per row
    section_padding = 50  # Padding for the section labels
    section_spacing = 20  # Space between sections
    section_height = card_height + section_padding  # Total height of each section (cards + label)
    combined_height = (section_height * len(sections)) + (section_spacing * (len(sections) - 1))  # Total height
    combined_width = card_width * row_width  # Total width of the final image

    # Create the combined image with a black background
    combined_image = Image.new("RGBA", (combined_width, combined_height), (0, 0, 0, 255))
    draw = ImageDraw.Draw(combined_image)

    image_info_x = combined_width - 200
    image_info_y = 10

    if score is not None and player is not None:
        draw.text((image_info_x, image_info_y), f'Player {player.__hash__()}|Side {player.side}|Score{score}', fill="white")

    # Process each section (Hand, Table, Captures)
    y_offset = 0
    for section_name, section_state in sections.items():
        # Draw section label
        draw.text((10, y_offset + 10), section_name, fill="white")

        # Place cards in the current section
        active_indices = np.where(section_state != 0)[0]
        for i, index in enumerate(active_indices):
            # Calculate the correct card index for the global state
            card_index = index + {"Hand": 0, "Table": 40, "Captures": 80}[section_name]
            
            if index in card_images:
                card_image = card_images[index].resize((card_width, card_height))
                x = (i % row_width) * card_width
                y = y_offset + section_padding  # Place cards below the section label
                combined_image.paste(card_image, (x, y))

        # Update vertical offset for the next section (add spacing between sections)
        y_offset += section_height + section_spacing

    # Highlight the action card (if action is valid)
    if action in card_images:
        action_card = card_images[action].resize((card_width, card_height))
        # Find the section the action card belongs to
        if action < 40:
            action_section = "Hand"
            action_y_offset = 0
        elif action < 80:
            action_section = "Table"
            action_y_offset = section_height + section_spacing
        else:
            action_section = "Captures"
            action_y_offset = (section_height + section_spacing) * 2

        # Find the position of the action card within its section
        active_indices = np.where(sections[action_section] != 0)[0]
        action_position = active_indices.tolist().index(action % 40)  # Index of action card in active cards
        action_x = (action_position % row_width) * card_width
        action_y = action_y_offset + section_padding
        combined_image.paste(action_card, (action_x, action_y))
        # Draw a red border around the action card
        border_thickness = 2
        draw.rectangle(
            [action_x, action_y, action_x + card_width, action_y + card_height],
            outline="blue",
            width=border_thickness
        )

    return combined_image

In [8]:
s = np.ones(120)
s[:80] = 0
np.random.shuffle(s)
#local_state_to_image(s,0)

In [9]:
def log_state_to_tensorboard(state, action, player,player_index, step, score = None, extra = ''):
    image = local_state_to_image(state, action, score, player)
    image = np.array(image.convert('RGB'))
    writer.add_image(f"StateAction/P{player_index}" + extra, image.astype(np.uint8).transpose(2, 0, 1), step)

In [12]:
total_steps = 0
EPOCHS = 100
score_history = []
evaluate = False
best_score = 0

if evaluate:
    maddpg_agents.load_checkpoint()

for epoch in tqdm(range(EPOCHS)):
    obs = env.reset()
    env.playing_players = n_agents
    players = env.game.players
    score = 0
    done = [False]*n_agents

    ep_step = 0

    while not any(done):

        actions = maddpg_agents.choose_action(obs, players=players)
        #print(actions)
        new_observations = []
        new_rewards = []
        new_dones = []
        for i in range(n_agents):
            new_obs, reward, done[i], _ = env.step(actions[i], env.game.players[i], v=-6)
            if len(new_obs.flatten()) == 480:
                new_observations = new_obs
            else:
                new_observations.append(new_obs.flatten())
            new_rewards.append(reward)
            new_dones.append(done[i])
            score += reward
        i=0
        for o, a in zip(obs, actions):
            log_state_to_tensorboard(o, a, players[i], i, total_steps, score, extra='/Before')
            i+=1
        i=0

        
        for o, a in zip(new_observations, actions):
            log_state_to_tensorboard(o, -1, players[i], i, total_steps, score, extra='/After')
            i+=1

        if ep_step % 10 == 0:
            maddpg_agents.learn(memory)
        
        obs = new_observations

        score += sum([reward])
        total_steps += 1
        ep_step += 1

    score_history.append(score)
    avg_score = np.mean(score_history[-100:])
    if avg_score > best_score:
        os.makedirs('/workspace/ScopaAI_ToM/tmp/maddpg/' + scenario, exist_ok=True)
        maddpg_agents.save_checkpoint()
        best_score = avg_score
    print('Episode:', epoch, 'Score:', score, 'Average Score:', avg_score)

  0%|          | 0/100 [00:00<?, ?it/s]

... saving checkpoint ...


  1%|          | 1/100 [00:11<18:47, 11.39s/it]

Episode: 4 Score: 385.0 Average Score: 385.0
... saving checkpoint ...


  2%|▏         | 2/100 [00:19<15:33,  9.52s/it]

Episode: 4 Score: 449.85 Average Score: 417.425


  3%|▎         | 3/100 [00:33<18:42, 11.57s/it]

Episode: 4 Score: 203.99999999999994 Average Score: 346.2833333333333


  4%|▍         | 4/100 [00:46<19:09, 11.98s/it]

Episode: 4 Score: 155.00000000000006 Average Score: 298.4625


  5%|▌         | 5/100 [00:59<19:35, 12.37s/it]

Episode: 4 Score: 153.54999999999995 Average Score: 269.47999999999996


  6%|▌         | 6/100 [01:09<18:18, 11.68s/it]

Episode: 4 Score: 483.35 Average Score: 305.125


  7%|▋         | 7/100 [01:22<18:55, 12.20s/it]

Episode: 4 Score: 534.6000000000001 Average Score: 337.9071428571429


  8%|▊         | 8/100 [01:27<15:05,  9.85s/it]

Episode: 4 Score: -88.85000000000001 Average Score: 284.5625


  9%|▉         | 9/100 [01:34<13:34,  8.95s/it]

Episode: 4 Score: 322.84999999999997 Average Score: 288.81666666666666


 10%|█         | 10/100 [01:48<15:50, 10.56s/it]

Episode: 4 Score: 131.20000000000005 Average Score: 273.055


 11%|█         | 11/100 [02:04<17:50, 12.02s/it]

Episode: 4 Score: 653.75 Average Score: 307.6636363636364


 12%|█▏        | 12/100 [02:20<19:22, 13.22s/it]

Episode: 4 Score: 142.25000000000017 Average Score: 293.87916666666666


 13%|█▎        | 13/100 [02:35<20:10, 13.91s/it]

Episode: 4 Score: 146.4000000000001 Average Score: 282.5346153846154


 14%|█▍        | 14/100 [02:54<22:03, 15.38s/it]

Episode: 4 Score: 696.5999999999998 Average Score: 312.1107142857143


 15%|█▌        | 15/100 [03:15<24:17, 17.15s/it]

Episode: 4 Score: 381.44999999999993 Average Score: 316.73333333333335


 16%|█▌        | 16/100 [03:29<22:26, 16.03s/it]

Episode: 4 Score: 520.1500000000001 Average Score: 329.44687500000003


 17%|█▋        | 17/100 [03:38<19:26, 14.06s/it]

Episode: 4 Score: 110.25 Average Score: 316.5529411764706


 18%|█▊        | 18/100 [03:54<19:49, 14.51s/it]

Episode: 4 Score: 146.80000000000007 Average Score: 307.12222222222226


 19%|█▉        | 19/100 [04:08<19:20, 14.33s/it]

Episode: 4 Score: 599.75 Average Score: 322.52368421052637


 20%|██        | 20/100 [04:18<17:32, 13.16s/it]

Episode: 4 Score: 107.65000000000009 Average Score: 311.78000000000003


 21%|██        | 21/100 [04:26<15:16, 11.60s/it]

Episode: 4 Score: 393.04999999999995 Average Score: 315.65000000000003


 22%|██▏       | 22/100 [04:37<14:46, 11.37s/it]

Episode: 4 Score: 402.20000000000005 Average Score: 319.58409090909095


 23%|██▎       | 23/100 [04:50<15:25, 12.02s/it]

Episode: 4 Score: 198.7000000000001 Average Score: 314.3282608695652


 24%|██▍       | 24/100 [05:01<14:49, 11.70s/it]

Episode: 4 Score: 414.34999999999997 Average Score: 318.49583333333334


 25%|██▌       | 25/100 [05:14<15:02, 12.04s/it]

Episode: 4 Score: 491.65000000000015 Average Score: 325.422


 26%|██▌       | 26/100 [05:29<15:50, 12.85s/it]

Episode: 4 Score: 231.64999999999998 Average Score: 321.81538461538463


 27%|██▋       | 27/100 [05:37<13:49, 11.37s/it]

Episode: 4 Score: 440.09999999999997 Average Score: 326.19629629629634


 28%|██▊       | 28/100 [05:52<15:11, 12.65s/it]

Episode: 4 Score: 554.15 Average Score: 334.33750000000003


 29%|██▉       | 29/100 [06:03<14:16, 12.06s/it]

Episode: 4 Score: 87.5 Average Score: 325.82586206896553


 30%|███       | 30/100 [06:15<13:52, 11.90s/it]

Episode: 4 Score: 480.4000000000001 Average Score: 330.97833333333335


 31%|███       | 31/100 [06:30<14:47, 12.87s/it]

Episode: 4 Score: 532.45 Average Score: 337.47741935483873


 32%|███▏      | 32/100 [06:41<14:11, 12.52s/it]

Episode: 4 Score: 507.54999999999995 Average Score: 342.7921875


 33%|███▎      | 33/100 [06:56<14:46, 13.23s/it]

Episode: 4 Score: 232.7000000000001 Average Score: 339.45606060606065


 34%|███▍      | 34/100 [07:08<13:59, 12.71s/it]

Episode: 4 Score: 482.40000000000003 Average Score: 343.6602941176471


 35%|███▌      | 35/100 [07:27<15:58, 14.75s/it]

Episode: 4 Score: 280.0 Average Score: 341.8414285714286


 36%|███▌      | 36/100 [07:40<15:12, 14.27s/it]

Episode: 4 Score: 209.95 Average Score: 338.1777777777778


 37%|███▋      | 37/100 [07:54<14:45, 14.05s/it]

Episode: 4 Score: 595.3 Average Score: 345.12702702702705


 38%|███▊      | 38/100 [08:03<12:50, 12.43s/it]

Episode: 4 Score: 467.6000000000001 Average Score: 348.35


 39%|███▉      | 39/100 [08:15<12:40, 12.47s/it]

Episode: 4 Score: 217.05000000000007 Average Score: 344.98333333333335


 40%|████      | 40/100 [08:20<10:17, 10.29s/it]

Episode: 4 Score: 333.35 Average Score: 344.6925


 41%|████      | 41/100 [08:35<11:22, 11.57s/it]

Episode: 4 Score: 472.0 Average Score: 347.79756097560977


 42%|████▏     | 42/100 [08:46<11:03, 11.44s/it]

Episode: 4 Score: 514.8999999999999 Average Score: 351.7761904761905


 43%|████▎     | 43/100 [08:57<10:38, 11.21s/it]

Episode: 4 Score: 481.55 Average Score: 354.79418604651164


 44%|████▍     | 44/100 [09:09<10:39, 11.41s/it]

Episode: 4 Score: 613.0000000000001 Average Score: 360.66249999999997


 45%|████▌     | 45/100 [09:17<09:41, 10.57s/it]

Episode: 4 Score: 407.35 Average Score: 361.7


 46%|████▌     | 46/100 [09:30<10:11, 11.32s/it]

Episode: 4 Score: 608.7 Average Score: 367.0695652173913


 47%|████▋     | 47/100 [09:48<11:41, 13.23s/it]

Episode: 4 Score: 704.3499999999999 Average Score: 374.24574468085103


 48%|████▊     | 48/100 [09:53<09:25, 10.88s/it]

Episode: 4 Score: -39.29999999999998 Average Score: 365.6302083333333


 49%|████▉     | 49/100 [10:01<08:28,  9.98s/it]

Episode: 4 Score: 453.70000000000005 Average Score: 367.4275510204082


 50%|█████     | 50/100 [10:14<08:56, 10.72s/it]

Episode: 4 Score: 556.25 Average Score: 371.204


 51%|█████     | 51/100 [10:28<09:42, 11.90s/it]

Episode: 4 Score: 652.3 Average Score: 376.7156862745098


 52%|█████▏    | 52/100 [10:35<08:13, 10.28s/it]

Episode: 4 Score: -48.49999999999997 Average Score: 368.53846153846155


 53%|█████▎    | 53/100 [10:41<07:10,  9.15s/it]

Episode: 4 Score: 408.65000000000003 Average Score: 369.29528301886796


 54%|█████▍    | 54/100 [10:46<05:53,  7.68s/it]

Episode: 4 Score: -32.54999999999998 Average Score: 361.85370370370373


 55%|█████▌    | 55/100 [10:54<05:51,  7.81s/it]

Episode: 4 Score: 392.04999999999995 Average Score: 362.4027272727273


 56%|█████▌    | 56/100 [11:03<05:58,  8.14s/it]

Episode: 4 Score: 462.65 Average Score: 364.1928571428572


 57%|█████▋    | 57/100 [11:11<05:52,  8.20s/it]

Episode: 4 Score: 411.8 Average Score: 365.02807017543864


 58%|█████▊    | 58/100 [11:21<06:02,  8.63s/it]

Episode: 4 Score: 104.80000000000001 Average Score: 360.54137931034484


 59%|█████▉    | 59/100 [11:33<06:35,  9.64s/it]

Episode: 4 Score: 554.7500000000001 Average Score: 363.8330508474576


 60%|██████    | 60/100 [11:38<05:35,  8.40s/it]

Episode: 4 Score: 8.549999999999983 Average Score: 357.9116666666667


 61%|██████    | 61/100 [11:50<06:03,  9.33s/it]

Episode: 4 Score: 72.9500000000001 Average Score: 353.2401639344263


 62%|██████▏   | 62/100 [12:07<07:24, 11.71s/it]

Episode: 4 Score: 200.9500000000001 Average Score: 350.78387096774196


 63%|██████▎   | 63/100 [12:17<06:51, 11.11s/it]

Episode: 4 Score: 63.25000000000017 Average Score: 346.2198412698413


 64%|██████▍   | 64/100 [12:28<06:45, 11.26s/it]

Episode: 4 Score: 126.05000000000001 Average Score: 342.7796875


 65%|██████▌   | 65/100 [12:43<07:13, 12.39s/it]

Episode: 4 Score: 169.8 Average Score: 340.11846153846153


 66%|██████▌   | 66/100 [12:52<06:18, 11.14s/it]

Episode: 4 Score: 3.8499999999999943 Average Score: 335.0234848484848


 67%|██████▋   | 67/100 [13:03<06:15, 11.37s/it]

Episode: 4 Score: 232.3 Average Score: 333.4902985074627


 68%|██████▊   | 68/100 [13:15<06:04, 11.40s/it]

Episode: 4 Score: 181.60000000000014 Average Score: 331.25661764705876


 69%|██████▉   | 69/100 [13:28<06:13, 12.04s/it]

Episode: 4 Score: 542.3000000000002 Average Score: 334.3152173913043


 70%|███████   | 70/100 [13:38<05:34, 11.15s/it]

Episode: 4 Score: -1.4499999999999318 Average Score: 329.51857142857136


 71%|███████   | 71/100 [13:49<05:29, 11.35s/it]

Episode: 4 Score: 631.8000000000001 Average Score: 333.77605633802807


 72%|███████▏  | 72/100 [14:00<05:14, 11.24s/it]

Episode: 4 Score: 457.05 Average Score: 335.4881944444445


 73%|███████▎  | 73/100 [14:08<04:35, 10.22s/it]

Episode: 4 Score: 347.65 Average Score: 335.654794520548


 74%|███████▍  | 74/100 [14:16<04:03,  9.37s/it]

Episode: 4 Score: 380.65000000000003 Average Score: 336.26283783783794


 75%|███████▌  | 75/100 [14:31<04:40, 11.20s/it]

Episode: 4 Score: 578.45 Average Score: 339.49200000000013


 76%|███████▌  | 76/100 [14:40<04:11, 10.50s/it]

Episode: 4 Score: -16.00000000000003 Average Score: 334.81447368421067


 77%|███████▋  | 77/100 [14:52<04:14, 11.06s/it]

Episode: 4 Score: 214.50000000000017 Average Score: 333.25194805194815


 78%|███████▊  | 78/100 [15:00<03:43, 10.18s/it]

Episode: 4 Score: 451.45000000000005 Average Score: 334.76730769230784


 79%|███████▉  | 79/100 [15:12<03:43, 10.65s/it]

Episode: 4 Score: 452.50000000000006 Average Score: 336.257594936709


 80%|████████  | 80/100 [15:21<03:20, 10.00s/it]

Episode: 4 Score: 408.5000000000001 Average Score: 337.1606250000001


 81%|████████  | 81/100 [15:36<03:42, 11.70s/it]

Episode: 4 Score: 522.6000000000001 Average Score: 339.45000000000005


 82%|████████▏ | 82/100 [15:48<03:28, 11.61s/it]

Episode: 4 Score: 61.44999999999993 Average Score: 336.05975609756103


 83%|████████▎ | 83/100 [16:00<03:22, 11.91s/it]

Episode: 4 Score: 527.1499999999999 Average Score: 338.36204819277117


 84%|████████▍ | 84/100 [16:14<03:17, 12.32s/it]

Episode: 4 Score: 109.90000000000009 Average Score: 335.642261904762


 85%|████████▌ | 85/100 [16:22<02:48, 11.23s/it]

Episode: 4 Score: 419.05000000000007 Average Score: 336.62352941176476


 86%|████████▌ | 86/100 [16:35<02:42, 11.58s/it]

Episode: 4 Score: 472.1000000000001 Average Score: 338.1988372093024


 87%|████████▋ | 87/100 [16:48<02:35, 11.95s/it]

Episode: 4 Score: 495.95 Average Score: 340.0120689655173


 88%|████████▊ | 88/100 [17:05<02:42, 13.51s/it]

Episode: 4 Score: 217.14999999999986 Average Score: 338.61590909090916


 89%|████████▉ | 89/100 [17:18<02:26, 13.35s/it]

Episode: 4 Score: 168.99999999999994 Average Score: 336.71011235955064


 90%|█████████ | 90/100 [17:28<02:04, 12.50s/it]

Episode: 4 Score: 395.70000000000005 Average Score: 337.3655555555556


 91%|█████████ | 91/100 [17:42<01:56, 12.91s/it]

Episode: 4 Score: 608.3000000000002 Average Score: 340.3428571428572


 92%|█████████▏| 92/100 [17:56<01:45, 13.22s/it]

Episode: 4 Score: 287.2 Average Score: 339.7652173913044


 93%|█████████▎| 93/100 [18:02<01:17, 11.12s/it]

Episode: 4 Score: 424.9 Average Score: 340.6806451612904


 94%|█████████▍| 94/100 [18:12<01:04, 10.73s/it]

Episode: 4 Score: 166.19999999999993 Average Score: 338.82446808510645


 95%|█████████▌| 95/100 [18:21<00:51, 10.27s/it]

Episode: 4 Score: 31.399999999999977 Average Score: 335.58842105263165


 96%|█████████▌| 96/100 [18:40<00:51, 12.76s/it]

Episode: 4 Score: 747.9499999999999 Average Score: 339.8838541666667


 97%|█████████▋| 97/100 [18:53<00:38, 12.81s/it]

Episode: 4 Score: 117.20000000000005 Average Score: 337.588144329897


 98%|█████████▊| 98/100 [19:03<00:24, 12.18s/it]

Episode: 4 Score: 413.0000000000001 Average Score: 338.3576530612246


 99%|█████████▉| 99/100 [19:19<00:13, 13.34s/it]

Episode: 4 Score: 169.2 Average Score: 336.64898989898995


100%|██████████| 100/100 [19:32<00:00, 11.72s/it]

Episode: 4 Score: 538.5 Average Score: 338.6675000000001





to implement and keep track on to see if algo is getting better
- difference in scores
- times a card that can captures is not chosen
- capture size of ace
- number of scopas overtime