In [2]:
import random
import json
from copy import deepcopy
from tqdm import tqdm

In [3]:
colors = ["green", "red", "blue", "yellow", "purple"]
color_counts = {"green": 8, "red": 7, "blue": 7, "yellow": 7, "purple": 7}

def init_deck():
    deck = []
    for color in colors:
        deck += [color] * color_counts[color]
    random.shuffle(deck)
    return deck

def deal_hands(deck):
    hand1 = [deck.pop() for _ in range(14)]
    hand2 = [deck.pop() for _ in range(14)]
    return hand1, hand2

def get_row1_range(board):
    row1_cols = [int(pos.split('-')[1]) for pos in board if pos.startswith('1-')]
    if not row1_cols:
        return None, None
    return min(row1_cols), max(row1_cols)

def legal_moves(board, hand):
    moves = []
    min1, max1 = get_row1_range(board)
    count_row1 = sum([1 for pos in board if pos.startswith('1-')])
    if count_row1 == 0:
        for color in set(hand):
            moves.append(('1-7', color))
    else:
        if count_row1 < 7 and min1 > 1:
            for color in set(hand):
                moves.append((f"1-{min1-1}", color))
        if count_row1 < 7 and max1 < 13:
            for color in set(hand):
                moves.append((f"1-{max1+1}", color))
        for row in range(2, 8):  # Rows 2 to 7
            for col in range(1, 14 - row + 1):
                pos = f"{row}-{col}"
                below_left = f"{row-1}-{col}"
                below_right = f"{row-1}-{col+1}"
                if below_left in board and below_right in board and pos not in board:
                    for color in set(hand):
                        if board[below_left] == color or board[below_right] == color:
                            moves.append((pos, color))
    return moves

In [4]:
def greedy_move(board, hand, legal):
    # Ưu tiên nước đi đánh ra màu có nhiều nhất trên tay
    color_count = {c: hand.count(c) for c in set(hand)}
    legal_sorted = sorted(legal, key=lambda x: color_count.get(x[1], 0), reverse=True)
    return legal_sorted[0]  # Move ưu tiên

def simulate_one_game_greedy_samples():
    deck = init_deck()
    hand0, hand1 = deal_hands(deck)
    hands = [hand0, hand1]
    board = {}
    turn = 0
    samples = []
    move_count = 0
    skip = [False, False]
    while True:
        if hands[turn]:
            legal = legal_moves(board, hands[turn])
        else:
            legal = []

        if not legal or not hands[turn]:
            skip[turn] = True
            if all(skip):
                break
            turn = 1 - turn
            continue
        else:
            skip[turn] = False

        # First move
        if move_count == 0:
            move = ('1-7', random.choice(hands[turn]))
        elif move_count == 1:
            legal2 = [m for m in legal if m[0] in ('1-8', '1-9')]
            move = greedy_move(board, hands[turn], legal2) if legal2 else greedy_move(board, hands[turn], legal)
        else:
            move = greedy_move(board, hands[turn], legal)

        pos_played, color_played = move
        for (pos, color) in legal:
            label = int(pos == pos_played and color == color_played)
            sample = {
                "turn": turn,
                "board": deepcopy(board),
                "hand": deepcopy(hands[turn]),
                "pos": pos,
                "color": color,
                "label": label
            }
            samples.append(sample)
        board[pos_played] = color_played
        hands[turn].remove(color_played)
        turn = 1 - turn
        move_count += 1

    # Winner
    score0 = len(hands[0])
    score1 = len(hands[1])
    winner = 0 if score0 < score1 else 1 if score1 < score0 else random.choice([0, 1])
    for s in samples:
        s['winner'] = winner
    return samples

In [5]:
from copy import deepcopy
import random

# --- Prioritize Versatility Core ---
def versatility_score(board, hand, pos, color):
    # Simulate this move, count number of legal moves for next turn
    new_board = deepcopy(board)
    new_hand = hand.copy()
    new_board[pos] = color
    new_hand.remove(color)
    next_moves = legal_moves(new_board, new_hand)
    return len(next_moves)

def prioritize_versatility_move(board, hand, legal):
    if not legal:
        return None
    scores = [versatility_score(board, hand, pos, color) for (pos, color) in legal]
    max_score = max(scores)
    candidate_moves = [move for i, move in enumerate(legal) if scores[i] == max_score]
    return random.choice(candidate_moves)

# --- Sinh sample như framework ---
def simulate_one_game_versatility_samples():
    deck = init_deck()
    hand0, hand1 = deal_hands(deck)
    hands = [hand0, hand1]
    board = {}
    turn = 0
    samples = []
    move_count = 0
    skip = [False, False]
    while True:
        if hands[turn]:
            legal = legal_moves(board, hands[turn])
        else:
            legal = []
        if not legal or not hands[turn]:
            skip[turn] = True
            if all(skip):
                break
            turn = 1 - turn
            continue
        else:
            skip[turn] = False

        # First move: always 1-7
        if move_count == 0:
            move = ('1-7', random.choice(hands[turn]))
        elif move_count == 1:
            legal2 = [m for m in legal if m[0] in ('1-8', '1-9')]
            move = prioritize_versatility_move(board, hands[turn], legal2) if legal2 else prioritize_versatility_move(board, hands[turn], legal)
        else:
            move = prioritize_versatility_move(board, hands[turn], legal)

        pos_played, color_played = move
        for (pos, color) in legal:
            label = int(pos == pos_played and color == color_played)
            sample = {
                "turn": turn,
                "board": deepcopy(board),
                "hand": deepcopy(hands[turn]),
                "pos": pos,
                "color": color,
                "label": label
            }
            samples.append(sample)
        board[pos_played] = color_played
        hands[turn].remove(color_played)
        turn = 1 - turn
        move_count += 1

    # Winner
    score0 = len(hands[0])
    score1 = len(hands[1])
    winner = 0 if score0 < score1 else 1 if score1 < score0 else random.choice([0, 1])
    for s in samples:
        s['winner'] = winner
    return samples


In [6]:
import random
from copy import deepcopy

colors = ["green", "red", "blue", "yellow", "purple"]

def risk_of_move(board, move):
    """
    Risk of a move = how many new positions (for the next row) this move will create.
    The fewer, the safer!
    """
    pos, color = move
    row, col = map(int, pos.split('-'))
    next_pos_1 = f"{row+1}-{col}"
    next_pos_2 = f"{row+1}-{col-1}" if col > 1 else None
    next_positions = [next_pos_1]
    if next_pos_2: next_positions.append(next_pos_2)
    risk = 0
    for npos in next_positions:
        if npos not in board:
            risk += 1
    return risk

def safe_first_move(board, hand, legal):
    if not legal:
        return None
    risks = [risk_of_move(board, move) for move in legal]
    min_risk = min(risks)
    candidate_moves = [move for i, move in enumerate(legal) if risks[i] == min_risk]
    return random.choice(candidate_moves)

def simulate_one_game_safe_first_samples():
    deck = init_deck()
    hand0, hand1 = deal_hands(deck)
    hands = [hand0, hand1]
    board = {}
    turn = 0
    samples = []
    move_count = 0
    skip = [False, False]
    while True:
        if hands[turn]:
            legal = legal_moves(board, hands[turn])
        else:
            legal = []
        if not legal or not hands[turn]:
            skip[turn] = True
            if all(skip):
                break
            turn = 1 - turn
            continue
        else:
            skip[turn] = False

        # First move
        if move_count == 0:
            move = ('1-7', random.choice(hands[turn]))
        elif move_count == 1:
            legal2 = [m for m in legal if m[0] in ('1-8', '1-9')]
            move = safe_first_move(board, hands[turn], legal2) if legal2 else safe_first_move(board, hands[turn], legal)
        else:
            move = safe_first_move(board, hands[turn], legal)

        pos_played, color_played = move
        for (pos, color) in legal:
            label = int(pos == pos_played and color == color_played)
            sample = {
                "turn": turn,
                "board": deepcopy(board),
                "hand": deepcopy(hands[turn]),
                "pos": pos,
                "color": color,
                "label": label
            }
            samples.append(sample)
        board[pos_played] = color_played
        hands[turn].remove(color_played)
        turn = 1 - turn
        move_count += 1

    # Winner
    score0 = len(hands[0])
    score1 = len(hands[1])
    winner = 0 if score0 < score1 else 1 if score1 < score0 else random.choice([0, 1])
    for s in samples:
        s['winner'] = winner
    return samples

In [4]:
def match_move(board, hand, legal):
    """
    Ưu tiên nước đi đánh ra lá trùng màu với ít nhất một lá phía dưới (ở 2 vị trí bên dưới).
    Nếu không có, chọn ngẫu nhiên trong các nước hợp lệ.
    """
    preferred = []
    for pos, color in legal:
        row, col = map(int, pos.split('-'))
        if row == 1:
            continue  # Hàng 1 không có dưới
        below_left = f"{row-1}-{col}"
        below_right = f"{row-1}-{col+1}"
        # Nếu màu của lá bài đánh ra trùng với 1 trong 2 lá bên dưới
        if (board.get(below_left) == color or board.get(below_right) == color):
            preferred.append((pos, color))
    if preferred:
        return random.choice(preferred)
    return random.choice(legal)

def simulate_one_game_match_samples():
    deck = init_deck()
    hand0, hand1 = deal_hands(deck)
    hands = [hand0, hand1]
    board = {}
    turn = 0
    samples = []
    move_count = 0
    skip = [False, False]
    while True:
        if hands[turn]:
            legal = legal_moves(board, hands[turn])
        else:
            legal = []

        if not legal or not hands[turn]:
            skip[turn] = True
            if all(skip):
                break
            turn = 1 - turn
            continue
        else:
            skip[turn] = False

        # First move
        if move_count == 0:
            move = ('1-7', random.choice(hands[turn]))
        elif move_count == 1:
            legal2 = [m for m in legal if m[0] in ('1-8', '1-9')]
            move = match_move(board, hands[turn], legal2) if legal2 else match_move(board, hands[turn], legal)
        else:
            move = match_move(board, hands[turn], legal)

        pos_played, color_played = move
        for (pos, color) in legal:
            label = int(pos == pos_played and color == color_played)
            sample = {
                "turn": turn,
                "board": deepcopy(board),
                "hand": deepcopy(hands[turn]),
                "pos": pos,
                "color": color,
                "label": label
            }
            samples.append(sample)
        board[pos_played] = color_played
        hands[turn].remove(color_played)
        turn = 1 - turn
        move_count += 1

    # Winner
    score0 = len(hands[0])
    score1 = len(hands[1])
    winner = 0 if score0 < score1 else 1 if score1 < score0 else random.choice([0, 1])
    for s in samples:
        s['winner'] = winner
    return samples

In [5]:
def block_move(board, hand, legal):
    """
    Ưu tiên nước đi chặn đối thủ: nước đi mà nếu không đánh thì đối thủ có thể đánh vào đó lượt sau.
    """
    block_candidates = []
    for pos, color in legal:
        # Giả sử nếu không đánh nước này, đối thủ có thể đánh không?
        hypothetical_board = deepcopy(board)
        hypothetical_hand = hand.copy()
        # Giả sử không đánh nước này
        # Tìm nước nào sẽ hợp lệ cho đối thủ nếu mình không đánh nước này
        hypothetical_legal = [m for m in legal if m != (pos, color)]
        # Đếm số nước hợp lệ mà nếu mình không đánh thì đối thủ có thể đánh vào đó ngay
        for p2, c2 in hypothetical_legal:
            # Nếu đối thủ có thể đánh vào cùng một vị trí này
            if p2 == pos:
                block_candidates.append((pos, color))
                break
    if block_candidates:
        return random.choice(block_candidates)
    return random.choice(legal)

def simulate_one_game_block_samples():
    deck = init_deck()
    hand0, hand1 = deal_hands(deck)
    hands = [hand0, hand1]
    board = {}
    turn = 0
    samples = []
    move_count = 0
    skip = [False, False]
    while True:
        if hands[turn]:
            legal = legal_moves(board, hands[turn])
        else:
            legal = []
        if not legal or not hands[turn]:
            skip[turn] = True
            if all(skip):
                break
            turn = 1 - turn
            continue
        else:
            skip[turn] = False
        if move_count == 0:
            move = ('1-7', random.choice(hands[turn]))
        elif move_count == 1:
            legal2 = [m for m in legal if m[0] in ('1-8', '1-9')]
            move = block_move(board, hands[turn], legal2) if legal2 else block_move(board, hands[turn], legal)
        else:
            move = block_move(board, hands[turn], legal)
        pos_played, color_played = move
        for (pos, color) in legal:
            label = int(pos == pos_played and color == color_played)
            sample = {
                "turn": turn,
                "board": deepcopy(board),
                "hand": deepcopy(hands[turn]),
                "pos": pos,
                "color": color,
                "label": label
            }
            samples.append(sample)
        board[pos_played] = color_played
        hands[turn].remove(color_played)
        turn = 1 - turn
        move_count += 1
    score0 = len(hands[0])
    score1 = len(hands[1])
    winner = 0 if score0 < score1 else 1 if score1 < score0 else random.choice([0, 1])
    for s in samples:
        s['winner'] = winner
    return samples

In [7]:
def hybrid_move(board, hand, legal):
    """
    Kết hợp ưu tiên match màu dưới, nếu nhiều nước cùng match thì chọn màu nhiều nhất trên tay.
    """
    # Ưu tiên nước đi trùng màu phía dưới
    preferred = []
    for pos, color in legal:
        row, col = map(int, pos.split('-'))
        if row == 1:
            continue
        below_left = f"{row-1}-{col}"
        below_right = f"{row-1}-{col+1}"
        if (board.get(below_left) == color or board.get(below_right) == color):
            preferred.append((pos, color))
    candidate_moves = preferred if preferred else legal
    # Trong candidate, ưu tiên màu nhiều nhất trên tay
    color_count = {c: hand.count(c) for c in set(hand)}
    sorted_moves = sorted(candidate_moves, key=lambda x: color_count.get(x[1], 0), reverse=True)
    return sorted_moves[0]

def simulate_one_game_hybrid_samples():
    deck = init_deck()
    hand0, hand1 = deal_hands(deck)
    hands = [hand0, hand1]
    board = {}
    turn = 0
    samples = []
    move_count = 0
    skip = [False, False]
    while True:
        if hands[turn]:
            legal = legal_moves(board, hands[turn])
        else:
            legal = []
        if not legal or not hands[turn]:
            skip[turn] = True
            if all(skip):
                break
            turn = 1 - turn
            continue
        else:
            skip[turn] = False
        if move_count == 0:
            move = ('1-7', random.choice(hands[turn]))
        elif move_count == 1:
            legal2 = [m for m in legal if m[0] in ('1-8', '1-9')]
            move = hybrid_move(board, hands[turn], legal2) if legal2 else hybrid_move(board, hands[turn], legal)
        else:
            move = hybrid_move(board, hands[turn], legal)
        pos_played, color_played = move
        for (pos, color) in legal:
            label = int(pos == pos_played and color == color_played)
            sample = {
                "turn": turn,
                "board": deepcopy(board),
                "hand": deepcopy(hands[turn]),
                "pos": pos,
                "color": color,
                "label": label
            }
            samples.append(sample)
        board[pos_played] = color_played
        hands[turn].remove(color_played)
        turn = 1 - turn
        move_count += 1
    score0 = len(hands[0])
    score1 = len(hands[1])
    winner = 0 if score0 < score1 else 1 if score1 < score0 else random.choice([0, 1])
    for s in samples:
        s['winner'] = winner
    return samples

In [8]:
import joblib
import numpy as np
import random
from copy import deepcopy

colors = ["green", "red", "blue", "yellow", "purple"]

def encode_board(board):
    cnt = [0]*5
    for v in board.values():
        if v in colors:
            cnt[colors.index(v)] += 1
    return cnt

def encode_hand(hand):
    return [hand.count(c) for c in colors]

def encode_sample(board, hand, pos, color):
    bvec = encode_board(board)
    hvec = encode_hand(hand)
    row_idx, col_idx = [int(x) for x in pos.split('-')]
    color_oh = [0]*5
    color_oh[colors.index(color)] = 1
    return hvec + bvec + [row_idx, col_idx] + color_oh

def model_best_move(board, hand, legal_moves, model):
    if not legal_moves:
        return None
    X = np.array([encode_sample(board, hand, pos, color) for (pos, color) in legal_moves])
    probs = model.predict_proba(X)[:, 1]
    idx = np.argmax(probs)
    return legal_moves[idx]

def simulate_one_game_model_samples(model_path):
    # Load model inside function for flexibility
    model = joblib.load(model_path)
    deck = init_deck()
    hand0, hand1 = deal_hands(deck)
    hands = [hand0, hand1]
    board = {}
    turn = 0
    samples = []
    move_count = 0
    skip = [False, False]
    while True:
        if hands[turn]:
            legal = legal_moves(board, hands[turn])
        else:
            legal = []

        if not legal or not hands[turn]:
            skip[turn] = True
            if all(skip):
                break
            turn = 1 - turn
            continue
        else:
            skip[turn] = False

        # First move: 1-7
        if move_count == 0:
            move = ('1-7', model_best_move(board, hands[turn], [('1-7', c) for c in set(hands[turn])], model)[1])
        elif move_count == 1:
            legal2 = [m for m in legal if m[0] in ('1-8', '1-9')]
            if legal2:
                move = model_best_move(board, hands[turn], legal2, model)
            else:
                move = model_best_move(board, hands[turn], legal, model)
        else:
            move = model_best_move(board, hands[turn], legal, model)

        pos_played, color_played = move
        for (pos, color) in legal:
            label = int(pos == pos_played and color == color_played)
            sample = {
                "turn": turn,
                "board": deepcopy(board),
                "hand": deepcopy(hands[turn]),
                "pos": pos,
                "color": color,
                "label": label,
                "bot": "model"
            }
            samples.append(sample)
        board[pos_played] = color_played
        hands[turn].remove(color_played)
        turn = 1 - turn
        move_count += 1

    score0 = len(hands[0])
    score1 = len(hands[1])
    winner = 0 if score0 < score1 else 1 if score1 < score0 else random.choice([0, 1])
    for s in samples:
        s['winner'] = winner
    return samples


In [10]:
with open("penguin_party_mix_dataset.jsonl", "w", encoding="utf-8") as f:
    # Greedy bot
    for i in tqdm(range(6000), desc="Greedy"):
        samples = simulate_one_game_greedy_samples()
        for sample in samples:
            sample["bot"] = "greedy"
            f.write(json.dumps(sample, ensure_ascii=False) + "\n")
    # Versatility bot
    for i in tqdm(range(4000), desc="Versatility"):
        samples = simulate_one_game_versatility_samples()
        for sample in samples:
            sample["bot"] = "versatility"
            f.write(json.dumps(sample, ensure_ascii=False) + "\n")
    # Safe First bot
    for i in tqdm(range(4000), desc="Safe First"):
        samples = simulate_one_game_safe_first_samples()
        for sample in samples:
            sample["bot"] = "safe_first"
            f.write(json.dumps(sample, ensure_ascii=False) + "\n")
    """
    # Match bot 
    for i in tqdm(range(N_GAMES), desc="Match"):
        samples = simulate_one_game_match_samples()
        for sample in samples:
            sample["bot"] = "match"
            f.write(json.dumps(sample, ensure_ascii=False) + "\n")
    # Block bot
    for i in tqdm(range(N_GAMES), desc="Block"):
        samples = simulate_one_game_block_samples()
        for sample in samples:
            sample["bot"] = "block"
            f.write(json.dumps(sample, ensure_ascii=False) + "\n")
    """
    # Hybrid bot
    for i in tqdm(range(2000), desc="Hybrid"):
        samples = simulate_one_game_hybrid_samples()
        for sample in samples:
            sample["bot"] = "hybrid"
            f.write(json.dumps(sample, ensure_ascii=False) + "\n")
    # Model bot
    for i in tqdm(range(2000), desc="Model"):
        samples = simulate_one_game_model_samples("penguinparty_DecisionTree.pkl")
        for sample in samples:
            sample["bot"] = "model"
            f.write(json.dumps(sample, ensure_ascii=False) + "\n")
    # Model bot
    for i in tqdm(range(2000), desc="Model"):
        samples = simulate_one_game_model_samples("penguinparty_XGBoost.pkl")
        for sample in samples:
            sample["bot"] = "model"
            f.write(json.dumps(sample, ensure_ascii=False) + "\n")

Greedy: 100%|██████████| 6000/6000 [00:21<00:00, 273.57it/s]
Versatility: 100%|██████████| 4000/4000 [01:02<00:00, 64.20it/s]
Safe First: 100%|██████████| 4000/4000 [00:11<00:00, 344.36it/s]
Hybrid: 100%|██████████| 2000/2000 [00:08<00:00, 239.96it/s]
Model: 100%|██████████| 2000/2000 [01:02<00:00, 31.80it/s]
Model: 100%|██████████| 2000/2000 [00:34<00:00, 58.52it/s]
