In [None]:
#  Imports
import numpy as np
import tensorflow as tf
import torch
import torch.nn as nn

#  Rebuild the DQN model for PyTorch file
class DQN(nn.Module):
    def __init__(self):
        super(DQN, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(6 * 7, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 7)
        )

    def forward(self, x):
        return self.model(x)

#  Model list with correct types
model_infos = [
    {'name': 'actor_critic_connect4', 'file': 'actor_critic_connect4.h5', 'type': 'keras'},
    {'name': 'best_model_mcts', 'file': 'best_model_mcts.pt', 'type': 'torch'},
    {'name': 'dqn_ep400', 'file': 'dqn_ep400.h5', 'type': 'keras'},
    {'name': 'M1_round20_2', 'file': 'M1_round20-2.h5', 'type': 'keras'},
    {'name': 'M1_policy_iter20000', 'file': 'M1_policy_iter20000.h5', 'type': 'keras'},
    {'name': 'DDQN_100_new', 'file': 'DDQN_100_new.h5', 'type': 'keras'},
    {'name': 'DDQN_1000', 'file': 'DDQN_1000.h5', 'type': 'keras'},
    {'name': 'DDQN_2000_4_28', 'file': 'DDQN_2000_4-28.h5', 'type': 'keras'},
    {'name': 'DDQN_1000_4_28', 'file': 'DDQN_1000_4-28.h5', 'type': 'keras'}
]

#  Load all models into dictionary
loaded_models = {}

for info in model_infos:
    if info['type'] == 'keras':
        model = tf.keras.models.load_model(info['file'], compile=False)
    elif info['type'] == 'torch':
        model = DQN()
        model.load_state_dict(torch.load(info['file']))
        model.eval()
    else:
        raise ValueError(f"Unknown model type {info['type']} for {info['name']}")
    
    loaded_models[info['name']] = {
        'model': model,
        'type': info['type']
    }

print(" All models loaded successfully!")


✅ All models loaded successfully!


In [13]:
def predict_q_values(model_info, board):
    """
    Takes a model_info dictionary and a 6x7 board,
    returns Q-values for each column (7 values).
    """
    if model_info['type'] == 'keras':
        input_shape = model_info['model'].input_shape

        if input_shape[-1] == 1:
            # Model expects (6,7,1) input
            board_input = board.reshape(1, 6, 7, 1)
        elif input_shape[-1] == 2:
            # Model expects (6,7,2) input
            board_input = np.zeros((1, 6, 7, 2), dtype=np.float32)
            board_input[0, :, :, 0] = (board == -1)
            board_input[0, :, :, 1] = (board == 1)
        else:
            raise ValueError(f"Unsupported input shape for Keras model: {input_shape}")

        q_values = model_info['model'].predict(board_input, verbose=0)[0]

    elif model_info['type'] == 'torch':
        board_tensor = torch.tensor(board, dtype=torch.float32).unsqueeze(0)
        with torch.no_grad():
            q_values = model_info['model'](board_tensor).cpu().numpy()[0]
    else:
        raise ValueError("Unknown model type for prediction.")

    # 🛡️ Safety check: must be 7 outputs
    if q_values.shape[0] != 7:
        raise ValueError(f"Model {model_info} returned {q_values.shape[0]} outputs instead of 7.")

    return q_values


In [11]:
def play_game(model1_info, model2_info, starting_player=1):
    """
    Plays one game between model1 and model2.
    starting_player = 1 (model1) or -1 (model2)
    Returns winner: 1 (model1 wins), -1 (model2 wins), 0 (draw)
    """
    board = np.zeros((6, 7), dtype=int)
    current_player = starting_player

    while True:
        legal_moves = [c for c in range(7) if board[0][c] == 0]
        if not legal_moves:
            return 0  # Draw

        # Select action
        if current_player == 1:
            model_info = model1_info
        else:
            model_info = model2_info

        q_values = predict_q_values(model_info, board)

        # Mask illegal moves
        q_values_masked = np.full_like(q_values, -np.inf)
        for move in legal_moves:
            q_values_masked[move] = q_values[move]

        action = int(np.argmax(q_values_masked))

        # Apply move
        for row in reversed(range(6)):
            if board[row, action] == 0:
                board[row, action] = current_player
                break

        # Check winner
        winner = check_winner(board)
        if winner != 0:
            return winner

        current_player *= -1  # Switch player


In [15]:
def check_winner(board):
    """
    Checks the board for a winner.
    Returns:
      1 if player 1 wins,
     -1 if player -1 wins,
      0 if no winner yet.
    """
    rows, cols = board.shape

    # Check horizontal
    for r in range(rows):
        for c in range(cols - 3):
            window = board[r, c:c+4]
            if np.all(window == 1):
                return 1
            if np.all(window == -1):
                return -1

    # Check vertical
    for r in range(rows - 3):
        for c in range(cols):
            window = board[r:r+4, c]
            if np.all(window == 1):
                return 1
            if np.all(window == -1):
                return -1

    # Check positively sloped diagonals
    for r in range(rows - 3):
        for c in range(cols - 3):
            window = [board[r+i][c+i] for i in range(4)]
            if np.all(np.array(window) == 1):
                return 1
            if np.all(np.array(window) == -1):
                return -1

    # Check negatively sloped diagonals
    for r in range(3, rows):
        for c in range(cols - 3):
            window = [board[r-i][c+i] for i in range(4)]
            if np.all(np.array(window) == 1):
                return 1
            if np.all(np.array(window) == -1):
                return -1

    return 0  # No winner yet


In [None]:
from collections import defaultdict
from tqdm import tqdm  # Progress bar

# Track results
results = defaultdict(lambda: defaultdict(lambda: {'wins': 0, 'losses': 0, 'ties': 0}))

model_names = list(loaded_models.keys())
num_games_per_pair = 10  # 5 starting first, 5 starting second

#  Total number of matchups
total_matchups = len(model_names) * (len(model_names) - 1) // 2

#  Progress bar for matchups
with tqdm(total=total_matchups, desc="Playing tournament") as pbar:
    for i in range(len(model_names)):
        for j in range(i + 1, len(model_names)):
            model1_name = model_names[i]
            model2_name = model_names[j]
            model1_info = loaded_models[model1_name]
            model2_info = loaded_models[model2_name]

            try:
                # Test if both models can predict properly
                dummy_board = np.zeros((6, 7), dtype=int)
                _ = predict_q_values(model1_info, dummy_board)
                _ = predict_q_values(model2_info, dummy_board)
            except Exception as e:
                print(f" Skipping matchup {model1_name} vs {model2_name} because: {e}")
                pbar.update(1)
                continue

            # Play games
            for game in range(num_games_per_pair // 2):
                winner = play_game(model1_info, model2_info, starting_player=1)
                if winner == 1:
                    results[model1_name][model2_name]['wins'] += 1
                    results[model2_name][model1_name]['losses'] += 1
                elif winner == -1:
                    results[model1_name][model2_name]['losses'] += 1
                    results[model2_name][model1_name]['wins'] += 1
                else:
                    results[model1_name][model2_name]['ties'] += 1
                    results[model2_name][model1_name]['ties'] += 1

            for game in range(num_games_per_pair // 2):
                winner = play_game(model1_info, model2_info, starting_player=-1)
                if winner == 1:
                    results[model1_name][model2_name]['wins'] += 1
                    results[model2_name][model1_name]['losses'] += 1
                elif winner == -1:
                    results[model1_name][model2_name]['losses'] += 1
                    results[model2_name][model1_name]['wins'] += 1
                else:
                    results[model1_name][model2_name]['ties'] += 1
                    results[model2_name][model1_name]['ties'] += 1

            pbar.update(1)  #  Update progress bar after each matchup

print(" Tournament completed!")


Playing tournament:   6%|▌         | 2/36 [00:00<00:05,  6.69it/s]

⚠️ Skipping matchup actor_critic_connect4 vs best_model_mcts because: Model {'model': <Functional name=functional_78, built=True>, 'type': 'keras'} returned 1 outputs instead of 7.
⚠️ Skipping matchup actor_critic_connect4 vs dqn_ep400 because: Model {'model': <Functional name=functional_78, built=True>, 'type': 'keras'} returned 1 outputs instead of 7.


Playing tournament:  11%|█         | 4/36 [00:00<00:05,  6.18it/s]

⚠️ Skipping matchup actor_critic_connect4 vs M1_round20_2 because: Model {'model': <Functional name=functional_78, built=True>, 'type': 'keras'} returned 1 outputs instead of 7.
⚠️ Skipping matchup actor_critic_connect4 vs M1_policy_iter20000 because: Model {'model': <Functional name=functional_78, built=True>, 'type': 'keras'} returned 1 outputs instead of 7.


Playing tournament:  14%|█▍        | 5/36 [00:00<00:05,  6.04it/s]

⚠️ Skipping matchup actor_critic_connect4 vs DDQN_100_new because: Model {'model': <Functional name=functional_78, built=True>, 'type': 'keras'} returned 1 outputs instead of 7.


Playing tournament:  17%|█▋        | 6/36 [00:01<00:05,  5.51it/s]

⚠️ Skipping matchup actor_critic_connect4 vs DDQN_1000 because: Model {'model': <Functional name=functional_78, built=True>, 'type': 'keras'} returned 1 outputs instead of 7.
⚠️ Skipping matchup actor_critic_connect4 vs DDQN_2000_4_28 because: Model {'model': <Functional name=functional_78, built=True>, 'type': 'keras'} returned 1 outputs instead of 7.


Playing tournament:  22%|██▏       | 8/36 [00:01<00:04,  5.62it/s]

⚠️ Skipping matchup actor_critic_connect4 vs DDQN_1000_4_28 because: Model {'model': <Functional name=functional_78, built=True>, 'type': 'keras'} returned 1 outputs instead of 7.


Playing tournament: 100%|██████████| 36/36 [09:53<00:00, 16.50s/it]

✅ Tournament completed!





In [None]:
print("\n Tournament Results:")
for model1 in model_names:
    for model2 in model_names:
        if model1 != model2:
            r = results[model1][model2]
            total = r['wins'] + r['losses'] + r['ties']
            if total > 0:
                win_rate = r['wins'] / total * 100
                print(f"{model1} vs {model2}: {r['wins']}W - {r['losses']}L - {r['ties']}T (Win Rate: {win_rate:.1f}%)")



🏆 Tournament Results:
best_model_mcts vs dqn_ep400: 5W - 5L - 0T (Win Rate: 50.0%)
best_model_mcts vs M1_round20_2: 0W - 10L - 0T (Win Rate: 0.0%)
best_model_mcts vs M1_policy_iter20000: 5W - 5L - 0T (Win Rate: 50.0%)
best_model_mcts vs DDQN_100_new: 5W - 5L - 0T (Win Rate: 50.0%)
best_model_mcts vs DDQN_1000: 5W - 5L - 0T (Win Rate: 50.0%)
best_model_mcts vs DDQN_2000_4_28: 0W - 10L - 0T (Win Rate: 0.0%)
best_model_mcts vs DDQN_1000_4_28: 0W - 10L - 0T (Win Rate: 0.0%)
dqn_ep400 vs best_model_mcts: 5W - 5L - 0T (Win Rate: 50.0%)
dqn_ep400 vs M1_round20_2: 0W - 10L - 0T (Win Rate: 0.0%)
dqn_ep400 vs M1_policy_iter20000: 10W - 0L - 0T (Win Rate: 100.0%)
dqn_ep400 vs DDQN_100_new: 5W - 5L - 0T (Win Rate: 50.0%)
dqn_ep400 vs DDQN_1000: 5W - 5L - 0T (Win Rate: 50.0%)
dqn_ep400 vs DDQN_2000_4_28: 0W - 10L - 0T (Win Rate: 0.0%)
dqn_ep400 vs DDQN_1000_4_28: 10W - 0L - 0T (Win Rate: 100.0%)
M1_round20_2 vs best_model_mcts: 10W - 0L - 0T (Win Rate: 100.0%)
M1_round20_2 vs dqn_ep400: 10W - 0L -

In [None]:
import pandas as pd

#  Build wins and losses summary
summary_data = []

for model1 in model_names:
    total_wins = 0
    total_losses = 0

    for model2 in model_names:
        if model1 != model2:
            r = results[model1][model2]
            total_wins += r['wins']
            total_losses += r['losses']

    summary_data.append({'Model': model1, 'Wins': total_wins, 'Losses': total_losses})

#  Create DataFrame
summary_df = pd.DataFrame(summary_data)

# Sort by Wins if you want
summary_df = summary_df.sort_values(by='Wins', ascending=False).reset_index(drop=True)

#  Display
print("\ Wins and Losses Summary:")
display(summary_df)



🏆 Wins and Losses Summary:


Unnamed: 0,Model,Wins,Losses
0,M1_round20_2,70,0
1,DDQN_2000_4_28,45,25
2,dqn_ep400,35,35
3,DDQN_100_new,35,35
4,M1_policy_iter20000,30,40
5,DDQN_1000_4_28,30,40
6,best_model_mcts,20,50
7,DDQN_1000,15,55
8,actor_critic_connect4,0,0


In [None]:
import pandas as pd

#  Build "beats" matrix
beats_data = []

for model1 in model_names:
    row = {}
    for model2 in model_names:
        if model1 == model2:
            row[model2] = "-"
        else:
            r = results[model1][model2]
            row[model2] = r['wins']
    beats_data.append(row)

beats_df = pd.DataFrame(beats_data, index=model_names, columns=model_names)

#  Display
print("\n Beat-Each-Other Matrix (Wins):")
display(beats_df)



🏆 Beat-Each-Other Matrix (Wins):


Unnamed: 0,actor_critic_connect4,best_model_mcts,dqn_ep400,M1_round20_2,M1_policy_iter20000,DDQN_100_new,DDQN_1000,DDQN_2000_4_28,DDQN_1000_4_28
actor_critic_connect4,-,0,0,0,0,0,0,0,0
best_model_mcts,0,-,5,0,5,5,5,0,0
dqn_ep400,0,5,-,0,10,5,5,0,10
M1_round20_2,0,10,10,-,10,10,10,10,10
M1_policy_iter20000,0,5,0,0,-,10,10,0,5
DDQN_100_new,0,5,5,0,0,-,10,5,10
DDQN_1000,0,5,5,0,0,0,-,5,0
DDQN_2000_4_28,0,10,10,0,10,5,5,-,5
DDQN_1000_4_28,0,10,0,0,5,0,10,5,-


Playing Against Model

In [None]:
import numpy as np

def human_vs_model(model_info):
    board = np.zeros((6, 7), dtype=int)
    human_player = 1  # You are player 1
    model_player = -1 # Model is player -1
    current_player = 1  # Human starts

    def print_board(board):
        display_board = board.copy()
        display_board = display_board.astype(str)
        display_board[display_board == "0"] = "."
        display_board[display_board == "1"] = "X"  # Human pieces
        display_board[display_board == "-1"] = "O" # Model pieces
        print("\n".join([" ".join(row) for row in display_board]))
        print("-" * 14)
        print("0 1 2 3 4 5 6 (columns)")

    def get_legal_moves(board):
        return [c for c in range(7) if board[0][c] == 0]

    def apply_move(board, action, player):
        new_board = board.copy()
        for row in reversed(range(6)):
            if new_board[row][action] == 0:
                new_board[row][action] = player
                break
        return new_board

    print(" Let's play! You are 'X'. Model is 'O'.")
    print_board(board)

    while True:
        legal_moves = get_legal_moves(board)
        if not legal_moves:
            print(" It's a draw!")
            break

        if current_player == human_player:
            move = None
            while move not in legal_moves:
                try:
                    move = int(input(f"Your move (choose column {legal_moves}): "))
                except ValueError:
                    continue
            board = apply_move(board, move, human_player)

        else:
            q_values = predict_q_values(model_info, board)
            q_values_masked = np.full_like(q_values, -np.inf)
            for m in legal_moves:
                q_values_masked[m] = q_values[m]
            model_move = int(np.argmax(q_values_masked))
            print(f"\n Model chooses column {model_move}")
            board = apply_move(board, model_move, model_player)

        print_board(board)

        winner = check_winner(board)
        if winner == human_player:
            print(" You win! Congratulations!")
            break
        elif winner == model_player:
            print(" Model wins. Better luck next time!")
            break

        current_player *= -1  # Switch turns

# Load the winning model info
model_info = loaded_models['M1_round20_2']

# Start playing!
human_vs_model(model_info)


🎯 Let's play! You are 'X'. Model is 'O'.
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
--------------
0 1 2 3 4 5 6 (columns)
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . X . . . .
--------------
0 1 2 3 4 5 6 (columns)

🤖 Model chooses column 3
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . X O . . .
--------------
0 1 2 3 4 5 6 (columns)
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . X . . .
. . X O . . .
--------------
0 1 2 3 4 5 6 (columns)

🤖 Model chooses column 3
. . . . . . .
. . . . . . .
. . . . . . .
. . . O . . .
. . . X . . .
. . X O . . .
--------------
0 1 2 3 4 5 6 (columns)
. . . . . . .
. . . . . . .
. . . . . . .
. . . O . . .
. . . X . . .
. . X O . X .
--------------
0 1 2 3 4 5 6 (columns)

🤖 Model chooses column 2
. . . . . . .
. . . . . . .
. . . . . . .
. . . O . . .
. . O X . . .
. . X O . X .
--------------
0 1 2 3 4 5 6 (columns)
. . . . . . .
. . . 

Playing Against DDQN

In [None]:
import numpy as np

def human_vs_model(model_info):
    board = np.zeros((6, 7), dtype=int)
    human_player = 1  # You are player 1
    model_player = -1 # Model is player -1
    current_player = 1  # Human starts

    def print_board(board):
        display_board = board.copy()
        display_board = display_board.astype(str)
        display_board[display_board == "0"] = "."
        display_board[display_board == "1"] = "X"  # Human pieces
        display_board[display_board == "-1"] = "O" # Model pieces
        print("\n".join([" ".join(row) for row in display_board]))
        print("-" * 14)
        print("0 1 2 3 4 5 6 (columns)")

    def get_legal_moves(board):
        return [c for c in range(7) if board[0][c] == 0]

    def apply_move(board, action, player):
        new_board = board.copy()
        for row in reversed(range(6)):
            if new_board[row][action] == 0:
                new_board[row][action] = player
                break
        return new_board

    print(" Let's play! You are 'X'. Model is 'O'.")
    print_board(board)

    while True:
        legal_moves = get_legal_moves(board)
        if not legal_moves:
            print(" It's a draw!")
            break

        if current_player == human_player:
            move = None
            while move not in legal_moves:
                try:
                    move = int(input(f"Your move (choose column {legal_moves}): "))
                except ValueError:
                    continue
            board = apply_move(board, move, human_player)

        else:
            q_values = predict_q_values(model_info, board)
            q_values_masked = np.full_like(q_values, -np.inf)
            for m in legal_moves:
                q_values_masked[m] = q_values[m]
            model_move = int(np.argmax(q_values_masked))
            print(f"\n Model chooses column {model_move}")
            board = apply_move(board, model_move, model_player)

        print_board(board)

        winner = check_winner(board)
        if winner == human_player:
            print(" You win! Congratulations!")
            break
        elif winner == model_player:
            print(" Model wins. Better luck next time!")
            break

        current_player *= -1  # Switch turns

#  Load the new model info (DDQN_2000_4_28)
model_info = loaded_models['DDQN_2000_4_28']

#  Start playing!
human_vs_model(model_info)


🎯 Let's play! You are 'X'. Model is 'O'.
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
--------------
0 1 2 3 4 5 6 (columns)
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . X . . . .
--------------
0 1 2 3 4 5 6 (columns)

🤖 Model chooses column 0
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
O . X . . . .
--------------
0 1 2 3 4 5 6 (columns)
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
O . X . X . .
--------------
0 1 2 3 4 5 6 (columns)

🤖 Model chooses column 6
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
O . X . X . O
--------------
0 1 2 3 4 5 6 (columns)
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
O . X X X . O
--------------
0 1 2 3 4 5 6 (columns)

🤖 Model chooses column 5
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
. . . . . . .
O . X X X O O
--------------
0 1 2 3 4 5 6 (columns)
. . . . . . .
. . . 