<a href="https://colab.research.google.com/github/Pt-home/Dodgem/blob/main/Dodgem_3D_%E2%80%94_Python_Colab_Notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# 🧊 Dodgem 3D — 3 Players, Q-Learning Simulation

import random
import json
from collections import defaultdict, Counter
import matplotlib.pyplot as plt

In [2]:
# ⚙️ Параметри
board_size = 3
pieces_per_player = board_size - 1
games_to_play = 500000
learning_rate = 0.1
epsilon = 0.1
players = ['X', 'Y', 'Z']

own_axis = {'X': 0, 'Y': 1, 'Z': 2}
non_axes = {'X': [1, 2], 'Y': [0, 2], 'Z': [0, 1]}

In [3]:
# 🎯 Початкова позиція
def initial_state():
    state = {}
    for player in players:
        axis = own_axis[player]
        pieces = []
        for i in range(pieces_per_player):
            if player == 'X':
                pieces.append((board_size - pieces_per_player + i, 0, 0))
            elif player == 'Y':
                pieces.append((0, board_size - pieces_per_player + i, 0))
            elif player == 'Z':
                pieces.append((0, 0, board_size - pieces_per_player + i))
        state[player] = pieces
    return (state, 'X')

# 🧠 Дозволені ходи
def get_legal_moves(state, player):
    pos_dict, _ = state
    occupied = set(p for pl in pos_dict.values() for p in pl if p != 'out')
    legal = []
    axis = own_axis[player]
    for i, pos in enumerate(pos_dict[player]):
        if pos == 'out':
            continue
        coords = list(pos)
        # рух по своїй осі — вільний
        for delta in [-1, 1]:
            new_coords = coords.copy()
            new_coords[axis] += delta
            if 0 <= new_coords[axis] < board_size:
                if tuple(new_coords) not in occupied:
                    legal.append((i, tuple(new_coords)))
        # рух вперед по невласних
        for dim in non_axes[player]:
            new_coords = coords.copy()
            new_coords[dim] += 1
            if new_coords[dim] < board_size and tuple(new_coords) not in occupied:
                legal.append((i, tuple(new_coords)))
        # спеціальний хід: вихід
        if all(coords[dim] == board_size - 1 for dim in non_axes[player]):
            legal.append((i, 'exit'))
    return legal

# 🚪 Виконання ходу
def apply_move(state, move, player):
    pos_dict, current = state
    new_pos = {p: list(pos_dict[p]) for p in players}
    i, target = move
    if target == 'exit':
        new_pos[player][i] = 'out'
    else:
        new_pos[player][i] = target
    next_player = players[(players.index(player) + 1) % 3]
    return (new_pos, next_player)

# 🏆 Перемога
def check_winner(state):
    pos_dict, _ = state
    for player in players:
        if all(p == 'out' for p in pos_dict[player]):
            return player
    return None

In [4]:
# Q-таблиця
q_table = {p: defaultdict(lambda: defaultdict(lambda: 1.0)) for p in players}


In [6]:
# 🤖 Вибір ходу
def choose_move(player, state):
    legal = get_legal_moves(state, player)
    if not legal:
        return None
    if random.random() < epsilon:
        return random.choice(legal)
    weights = q_table[player][str(state)]
    weighted = [(m, weights[m]) for m in legal]
    total = sum(w for _, w in weighted)
    if total <= 0:
        return random.choice(legal)
    probs = [w / total for _, w in weighted]
    return random.choices([m for m, _ in weighted], probs)[0]

# 🎮 Гра
def play_game():
    state = initial_state()
    trajectory = []
    while True:
        winner = check_winner(state)
        if winner:
            return winner, trajectory
        player = state[1]
        move = choose_move(player, state)
        if move:
            trajectory.append((player, str(state), move))
            state = apply_move(state, move, player)
        else:
            state = (state[0], players[(players.index(player) + 1) % 3])

# 🧪 Тренування
def train(n):
    wins = Counter()
    for i in range(1, n + 1):
        result, path = play_game()
        wins[result] += 1
        for player, state_str, move in path:
            reward = 1 if result == player else 0
            q_table[player][state_str][move] += learning_rate * (reward - q_table[player][state_str][move])
        if i % 10000 == 0:
            total = sum(wins.values())
            print(f"After {i} games:", ', '.join(
                [f"{p} = {wins[p]} ({100 * wins[p]/total:.1f}%)" for p in players]))
    return wins

In [None]:
# 🚀 Запуск тренування
results = train(games_to_play)
print("✅ Final results:", results)

After 10000 games: X = 3778 (37.8%), Y = 3352 (33.5%), Z = 2870 (28.7%)
After 20000 games: X = 7577 (37.9%), Y = 6600 (33.0%), Z = 5823 (29.1%)
After 30000 games: X = 11316 (37.7%), Y = 9902 (33.0%), Z = 8782 (29.3%)
After 40000 games: X = 15076 (37.7%), Y = 13239 (33.1%), Z = 11685 (29.2%)
After 50000 games: X = 18912 (37.8%), Y = 16550 (33.1%), Z = 14538 (29.1%)
After 60000 games: X = 22667 (37.8%), Y = 19938 (33.2%), Z = 17395 (29.0%)
After 70000 games: X = 26527 (37.9%), Y = 23273 (33.2%), Z = 20200 (28.9%)
After 80000 games: X = 30269 (37.8%), Y = 26608 (33.3%), Z = 23123 (28.9%)
After 90000 games: X = 33997 (37.8%), Y = 29953 (33.3%), Z = 26050 (28.9%)
After 100000 games: X = 37803 (37.8%), Y = 33289 (33.3%), Z = 28908 (28.9%)
After 110000 games: X = 41611 (37.8%), Y = 36586 (33.3%), Z = 31803 (28.9%)
After 120000 games: X = 45406 (37.8%), Y = 39954 (33.3%), Z = 34640 (28.9%)
After 130000 games: X = 49171 (37.8%), Y = 43251 (33.3%), Z = 37578 (28.9%)
After 140000 games: X = 53018

In [None]:
# 💾 Збереження Q-таблиці
filename = f"Q3D_Final_{board_size}x{board_size}_G{games_to_play}_lr{learning_rate}_eps{epsilon}.json"
with open(filename, "w") as f:
    json.dump({p: {s: {str(m): v for m, v in q_table[p][s].items()} for s in q_table[p]} for p in q_table}, f)
print(f"💾 Q-table saved to: {filename}")

NameError: name 'board_size' is not defined

In [None]:
# 📈 Побудова графіка
plt.bar(results.keys(), results.values(), color=['red', 'blue', 'green'])
plt.title(f"Dodgem 3D ({board_size}³): Win Distribution")
plt.ylabel("Wins")
plt.show()