**KABADDI GAME**
Creating Environment


 Define the state, the rules for moving, capturing, and winning.

In [None]:
%%writefile kabaddi_env.py
import random
import copy

class GameState:
    """
    Represents the state of the Kabaddi game at any given moment.
    """
    def __init__(self, grid_size=(8, 5)):
        self.grid_size = grid_size
        self.width, self.height = grid_size
        self.mid_line = self.width // 2

        self.gold_positions = {
            'A': (0, self.height // 2),
            'B': (self.width - 1, self.height // 2)
        }

        # Initial state variables
        self.player_positions = {
            'A1': (1, 1), 'A2': (1, self.height - 2),
            'B1': (self.width - 2, 1), 'B2': (self.width - 2, self.height - 2)
        }
        self.captured_players = set()
        self.player_with_gold = None  # e.g., 'A1'

        self.current_turn = 'A'
        self.turn_count = 0
        self.max_turns = 100
        self.winner = None

    def get_player_team(self, player_id):
        return player_id[0]

    def get_opponent_team(self, team):
        return 'B' if team == 'A' else 'A'

    def is_in_enemy_territory(self, player_id):
        team = self.get_player_team(player_id)
        pos = self.player_positions[player_id]
        if team == 'A':
            return pos[0] >= self.mid_line
        else: # Team B
            return pos[0] < self.mid_line

class KabaddiGame:
    """
    Manages the game logic, rules, and state transitions.
    """
    def __init__(self):
        self.state = GameState()

    def clone(self):
        """Creates a deep copy of the game instance for simulations."""
        new_game = KabaddiGame()
        new_game.state = copy.deepcopy(self.state)
        return new_game

    def get_legal_moves_for_player(self, player_id):
        """Returns a list of valid (x, y) positions for a single player."""
        if player_id in self.state.captured_players:
            return []

        x, y = self.state.player_positions[player_id]
        legal_moves = []
        possible_moves = [(x, y), (x+1, y), (x-1, y), (x, y+1), (x, y-1)] # includes staying put

        team = self.state.get_player_team(player_id)
        teammate_id = f"{team}{2 if player_id.endswith('1') else 1}"
        teammate_pos = self.state.player_positions.get(teammate_id) if teammate_id not in self.state.captured_players else None

        for move in possible_moves:
            mx, my = move
            # Check grid boundaries
            if 0 <= mx < self.state.width and 0 <= my < self.state.height:
                # Check for collision with a teammate
                if move != teammate_pos:
                    legal_moves.append(move)
        return legal_moves

    def get_all_moves_for_team(self, team):
        """Generates all possible move combinations for a team."""
        player1_id = f"{team}1"
        player2_id = f"{team}2"

        p1_moves = self.get_legal_moves_for_player(player1_id)
        p2_moves = self.get_legal_moves_for_player(player2_id)

        if not p1_moves: p1_moves = [self.state.player_positions[player1_id]]
        if not p2_moves: p2_moves = [self.state.player_positions[player2_id]]

        #all combinations of moves
        combined_moves = []
        for move1 in p1_moves:
            for move2 in p2_moves:
                if move1 == move2 and player1_id not in self.state.captured_players and player2_id not in self.state.captured_players:
                    continue
                combined_moves.append({player1_id: move1, player2_id: move2})
        return combined_moves

    def _update_state(self, team_a_move, team_b_move):
        """Internal logic to update state after moves are chosen (for simultaneous)."""
        # Update positions
        new_positions = {**team_a_move, **team_b_move}
        for player_id, pos in new_positions.items():
            if player_id not in self.state.captured_players:
                self.state.player_positions[player_id] = pos

        # Check for captures
        players_to_capture = set()
        for defender_team in ['A', 'B']:
            attacker_team = self.state.get_opponent_team(defender_team)
            for d_id in [f"{defender_team}1", f"{defender_team}2"]:
                if d_id in self.state.captured_players: continue
                for a_id in [f"{attacker_team}1", f"{attacker_team}2"]:
                    if a_id in self.state.captured_players: continue

                    # If an attacker is in enemy territory and a defender moves on them
                    if self.state.is_in_enemy_territory(a_id) and self.state.player_positions[d_id] == self.state.player_positions[a_id]:
                        players_to_capture.add(a_id)

        for p_id in players_to_capture:
            self.state.captured_players.add(p_id)
            if self.state.player_with_gold == p_id:
                self.state.player_with_gold = None

        # Check for gold pickup
        for p_id, pos in self.state.player_positions.items():
            if p_id in self.state.captured_players: continue
            team = self.state.get_player_team(p_id)
            opponent_team = self.state.get_opponent_team(team)
            if pos == self.state.gold_positions[opponent_team] and self.state.is_in_enemy_territory(p_id):
                self.state.player_with_gold = p_id

        # Check for win condition
        if self.state.player_with_gold:
            winner_id = self.state.player_with_gold
            winner_team = self.state.get_player_team(winner_id)
            if not self.state.is_in_enemy_territory(winner_id):
                self.state.winner = winner_team

    def play_turn_based(self, team_move):
        """Applies a move for the current team in turn-by-turn mode."""
        if self.is_game_over(): return

        current_team = self.state.current_turn
        opponent_team = self.state.get_opponent_team(current_team)

        # Create static move for the opponent team (they don't move)
        opponent_move = {}
        for p_id in [f"{opponent_team}1", f"{opponent_team}2"]:
            opponent_move[p_id] = self.state.player_positions[p_id]

        if current_team == 'A':
            self._update_state(team_move, opponent_move)
        else:
            self._update_state(opponent_move, team_move)

        # Switch turns and check for draw
        self.state.current_turn = opponent_team
        self.state.turn_count += 1
        if self.state.turn_count >= self.state.max_turns:
            self.state.winner = 'DRAW'

    def play_simultaneous(self, team_a_move, team_b_move):
        """Applies moves for both teams at once."""
        if self.is_game_over(): return
        self._update_state(team_a_move, team_b_move)

        self.state.turn_count += 1
        if self.state.turn_count >= self.state.max_turns:
            self.state.winner = 'DRAW'

    def is_game_over(self):
        return self.state.winner is not None

    def render(self):
        """Prints a text-based representation of the game board."""
        grid = [['.' for _ in range(self.state.width)] for _ in range(self.state.height)]
        mid = self.state.mid_line
        for y in range(self.state.height):
            grid[y][mid-1] = '|'

        # Place Gold
        ga_x, ga_y = self.state.gold_positions['A']
        gb_x, gb_y = self.state.gold_positions['B']
        grid[ga_y][ga_x] = 'GA'
        grid[gb_y][gb_x] = 'GB'

        # Place Players
        for p_id, (x,y) in self.state.player_positions.items():
            if p_id in self.state.captured_players:
                continue

            display_id = p_id
            if self.state.player_with_gold == p_id:
                display_id += '*' # Mark player with gold
            grid[y][x] = display_id

        print("-" * (self.state.width * 4))
        for row in grid:
            print(' '.join(f'{str(cell):<3}' for cell in row))
        print(f"Turn: {self.state.turn_count}, To Play: {self.state.current_turn}, Player with Gold: {self.state.player_with_gold}, Captured: {self.state.captured_players}, Winner: {self.state.winner}")

Overwriting kabaddi_env.py


**Creating Agents**   Created 4 agents

In [None]:
%%writefile agents.py

import random
import math
import time
import copy

from kabaddi_env import KabaddiGame

class Agent:
    def __init__(self, team):
        self.team = team
    def get_move(self, game):
        raise NotImplementedError

class RandomAgent(Agent):
    def get_move(self, game):
        legal_moves = game.get_all_moves_for_team(self.team)
        return random.choice(legal_moves) if legal_moves else None

def evaluate_state(game_state, team):
    score = 0
    opponent_team = 'B' if team == 'A' else 'A'
    if game_state.winner == team: return 10000
    if game_state.winner == opponent_team: return -10000
    if game_state.winner == 'DRAW': return 0
    my_players = [p for p in game_state.player_positions if p.startswith(team) and p not in game_state.captured_players]
    opp_players = [p for p in game_state.player_positions if p.startswith(opponent_team) and p not in game_state.captured_players]
    score += (len(my_players) - len(opp_players)) * 500
    my_gold_carrier = None
    if game_state.player_with_gold and game_state.player_with_gold.startswith(team):
        my_gold_carrier = game_state.player_with_gold
    if my_gold_carrier:
        pos = game_state.player_positions[my_gold_carrier]
        home_territory_line = game_state.mid_line if team == 'B' else game_state.mid_line - 1
        dist_to_home = abs(pos[0] - home_territory_line)
        score += (20 - dist_to_home) * 100
    else:
        enemy_gold_pos = game_state.gold_positions[opponent_team]
        min_dist = float('inf')
        for p_id in my_players:
            pos = game_state.player_positions[p_id]
            dist = abs(pos[0] - enemy_gold_pos[0]) + abs(pos[1] - enemy_gold_pos[1])
            if dist < min_dist: min_dist = dist
        score -= min_dist * 10
    return score

class GreedyAgent(Agent):
    def get_move(self, game):
        best_score = -float('inf')
        best_move = None
        legal_moves = game.get_all_moves_for_team(self.team)
        if not legal_moves: return None
        for move in legal_moves:
            temp_game = game.clone()
            temp_game.play_turn_based(move)
            score = evaluate_state(temp_game.state, self.team)
            if score > best_score:
                best_score = score
                best_move = move
        return best_move or random.choice(legal_moves)

class AlphaBetaAgent(Agent):
    def __init__(self, team, depth=3):
        super().__init__(team)
        self.depth = depth
    def get_move(self, game):
        _, move = self.alpha_beta(game, self.depth, -float('inf'), float('inf'), True)
        return move
    def alpha_beta(self, game, depth, alpha, beta, is_maximizing_player):
        if depth == 0 or game.is_game_over():
            return evaluate_state(game.state, self.team), None
        current_team_to_move = game.state.current_turn
        is_my_turn = (self.team == current_team_to_move)
        if is_my_turn:
            max_eval = -float('inf')
            best_move = None
            moves = game.get_all_moves_for_team(self.team)
            if not moves: return -float('inf'), None
            for move in moves:
                sim_game = game.clone()
                sim_game.play_turn_based(move)
                evaluation, _ = self.alpha_beta(sim_game, depth - 1, alpha, beta, False)
                if evaluation > max_eval:
                    max_eval = evaluation
                    best_move = move
                alpha = max(alpha, evaluation)
                if beta <= alpha: break
            return max_eval, best_move
        else:
            min_eval = float('inf')
            opponent_team = game.state.get_opponent_team(self.team)
            moves = game.get_all_moves_for_team(opponent_team)
            if not moves: return float('inf'), None
            for move in moves:
                sim_game = game.clone()
                sim_game.play_turn_based(move)
                evaluation, _ = self.alpha_beta(sim_game, depth - 1, alpha, beta, True)
                if evaluation < min_eval: min_eval = evaluation
                beta = min(beta, evaluation)
                if beta <= alpha: break
            return min_eval, None

class MCTSNode:
    def __init__(self, game_state, parent=None, move=None):
        self.game_state = game_state
        self.parent = parent
        self.move = move
        self.children = []
        self.wins = 0
        self.visits = 0
        self.untried_moves = None

    def select_child(self):
        C = 1.41
        s = sorted(self.children, key=lambda c: c.wins / c.visits + C * math.sqrt(2 * math.log(self.visits) / c.visits))[-1]
        return s

class MCTSAgent(Agent):
    def __init__(self, team, iterations=100):
        super().__init__(team)
        self.iterations = iterations

    def get_move(self, game):
        root = MCTSNode(game_state=copy.deepcopy(game.state))

        for _ in range(self.iterations):
            node = root
            temp_game = game.clone()
            temp_game.state = copy.deepcopy(node.game_state)

            # 1. Selection
            while node.untried_moves is not None and not node.untried_moves: # Node is fully expanded
                if not node.children: break # Reached a leaf node
                node = node.select_child()
                temp_game.play_turn_based(node.move)

            # 2. Expansion
            if not temp_game.is_game_over():
                if node.untried_moves is None: # First time visiting node, get its moves
                    node.untried_moves = temp_game.get_all_moves_for_team(temp_game.state.current_turn)
                    random.shuffle(node.untried_moves)

                if node.untried_moves: # If there are moves to try, expand one
                    move = node.untried_moves.pop()
                    temp_game.play_turn_based(move)
                    child_node = MCTSNode(game_state=copy.deepcopy(temp_game.state), parent=node, move=move)
                    node.children.append(child_node)
                    node = child_node # Move to new node for simulation


            while not temp_game.is_game_over():
                moves = temp_game.get_all_moves_for_team(temp_game.state.current_turn)
                if moves:
                    temp_game.play_turn_based(random.choice(moves))
                else:
                    temp_game.state.winner = 'DRAW'
                    break

            # 4. Backpropagation
            result = temp_game.state.winner
            while node is not None:
                node.visits += 1
                # Determine which team the parent node belonged to for win calculation
                if node.parent:
                    parent_team_turn = node.parent.game_state.current_turn
                    if result == parent_team_turn:
                        node.wins += 1
                    elif result == 'DRAW':
                        node.wins += 0.5
                node = node.parent

        # Failsafe: if for some reason no children were created (e.g., game ends in 1 turn), pick a random move.
        if not root.children:
            moves = game.get_all_moves_for_team(self.team)
            return random.choice(moves) if moves else None

        # Select the move of the most visited child
        best_child = sorted(root.children, key=lambda c: c.visits)[-1]
        return best_child.move

Overwriting agents.py


**Simulating Each Agent and Comparing Result**
**Turn BY Turn Mode**

In [None]:
import importlib
import kabaddi_env
import agents
importlib.reload(kabaddi_env)
importlib.reload(agents)

from kabaddi_env import KabaddiGame
from agents import RandomAgent, GreedyAgent, AlphaBetaAgent, MCTSAgent

def run_game(agent_a, agent_b, game_mode='turn'):
    """Runs a single game between two agents."""
    game = KabaddiGame()
    agents = {'A': agent_a, 'B': agent_b}

    while not game.is_game_over():
        if game_mode == 'turn':
            current_team = game.state.current_turn
            move = agents[current_team].get_move(game)
            if move:
                game.play_turn_based(move)

        elif game_mode == 'simultaneous':
            move_a = agents['A'].get_move(game)
            move_b = agents['B'].get_move(game)
            if move_a and move_b:
                game.play_simultaneous(move_a, move_b)
            else: # If any agent fails to move, end in a draw
                game.state.winner = 'DRAW'

    return game.state.winner

def run_tournament(agent_pairs, num_games=100, game_mode='turn'):
    """Runs a tournament for a list of agent pairs."""
    print(f"\n--- Running Tournament: {game_mode.upper()} Mode ({num_games} games each) ---")

    for agent1_class, agent2_class in agent_pairs:
        results = {'A': 0, 'B': 0, 'DRAW': 0}
        agent1_name = agent1_class.__name__
        agent2_name = agent2_class.__name__

        for i in range(num_games):
            # Alternate starting teams
            if i % 2 == 0:
                agent_a = agent1_class('A')
                agent_b = agent2_class('B')
                winner = run_game(agent_a, agent_b, game_mode)
                results[winner] += 1
            else:
                agent_a = agent2_class('A')
                agent_b = agent1_class('B')
                winner = run_game(agent_a, agent_b, game_mode)
                # Flip the result back for consistent reporting
                if winner == 'A': results['B'] += 1
                elif winner == 'B': results['A'] += 1
                else: results['DRAW'] +=1

        print(f"Matchup: {agent1_name} (A) vs {agent2_name} (B)")
        print(f"  {agent1_name} Wins: {results['A']} ({results['A']/num_games:.0%})")
        print(f"  {agent2_name} Wins: {results['B']} ({results['B']/num_games:.0%})")
        print(f"  Draws: {results['DRAW']} ({results['DRAW']/num_games:.0%})")
        print("-" * 30)

if __name__ == "__main__":
    # Define agent matchups to test
    matchups = [
    # Random Agent vs. all others
    (RandomAgent, GreedyAgent),
    (RandomAgent, AlphaBetaAgent),
    (RandomAgent, MCTSAgent),

    # Greedy Agent vs. all stronger agents
    (GreedyAgent, AlphaBetaAgent),
    (GreedyAgent, MCTSAgent),

    # The final "heavyweight" matchup
    (AlphaBetaAgent, MCTSAgent)
]

    # Run for both environments
    run_tournament(matchups, num_games=50, game_mode='turn')


--- Running Tournament: TURN Mode (50 games each) ---
Matchup: RandomAgent (A) vs GreedyAgent (B)
  RandomAgent Wins: 0 (0%)
  GreedyAgent Wins: 38 (76%)
  Draws: 12 (24%)
------------------------------
Matchup: RandomAgent (A) vs AlphaBetaAgent (B)
  RandomAgent Wins: 0 (0%)
  AlphaBetaAgent Wins: 50 (100%)
  Draws: 0 (0%)
------------------------------
Matchup: RandomAgent (A) vs MCTSAgent (B)
  RandomAgent Wins: 0 (0%)
  MCTSAgent Wins: 7 (14%)
  Draws: 43 (86%)
------------------------------
Matchup: GreedyAgent (A) vs AlphaBetaAgent (B)
  GreedyAgent Wins: 0 (0%)
  AlphaBetaAgent Wins: 50 (100%)
  Draws: 0 (0%)
------------------------------
Matchup: GreedyAgent (A) vs MCTSAgent (B)
  GreedyAgent Wins: 20 (40%)
  MCTSAgent Wins: 11 (22%)
  Draws: 19 (38%)
------------------------------
Matchup: AlphaBetaAgent (A) vs MCTSAgent (B)
  AlphaBetaAgent Wins: 50 (100%)
  MCTSAgent Wins: 0 (0%)
  Draws: 0 (0%)
------------------------------


**SIMULTANEOUT MODE**

In [None]:
# make sure the new files are recognized
import importlib
import kabaddi_env
import agents
importlib.reload(kabaddi_env)
importlib.reload(agents)


from kabaddi_env import KabaddiGame
from agents import RandomAgent, GreedyAgent, AlphaBetaAgent, MCTSAgent

def run_game(agent_a, agent_b, game_mode='turn'):
    game = KabaddiGame()
    agents_map = {'A': agent_a, 'B': agent_b}
    while not game.is_game_over():
        if game_mode == 'turn':
            current_team = game.state.current_turn
            move = agents_map[current_team].get_move(game)
            if move:
                game.play_turn_based(move)
            else: # No moves possible, end in a draw
                game.state.winner = 'DRAW'

        elif game_mode == 'simultaneous':
            # NOTE: For simultaneous moves, the agents still look at the *current* board
            # state to make their decision. They don't know what the opponent will do.
            move_a = agents_map['A'].get_move(game)
            move_b = agents_map['B'].get_move(game)
            if move_a and move_b:
                game.play_simultaneous(move_a, move_b)
            else: # If any agent fails to move, end in a draw
                game.state.winner = 'DRAW'

    return game.state.winner

def run_tournament(agent_pairs, num_games=50, game_mode='turn'):
    print(f"\n--- Running Tournament: {game_mode.upper()} Mode ({num_games} games each) ---")
    for agent1_class, agent2_class in agent_pairs:
        results = {'A': 0, 'B': 0, 'DRAW': 0, None: 0} # Added None for safety
        agent1_name = agent1_class.__name__
        agent2_name = agent2_class.__name__
        print(f"Starting Matchup: {agent1_name} vs {agent2_name}...")
        for i in range(num_games):
            if i % 2 == 0:
                agent_a = agent1_class('A')
                agent_b = agent2_class('B')
                winner = run_game(agent_a, agent_b, game_mode)
                results[winner] += 1
            else:
                agent_a = agent2_class('A')
                agent_b = agent1_class('B')
                winner = run_game(agent_a, agent_b, game_mode)
                if winner == 'A': results['B'] += 1
                elif winner == 'B': results['A'] += 1
                else: results['DRAW'] +=1

        # Handle cases where winner was None
        if None in results:
            results['DRAW'] += results.pop(None)

        print(f"Matchup: {agent1_name} (A) vs {agent2_name} (B)")
        print(f"  {agent1_name} Wins: {results['A']} ({results['A']/num_games:.0%})")
        print(f"  {agent2_name} Wins: {results['B']} ({results['B']/num_games:.0%})")
        print(f"  Draws: {results['DRAW']} ({results['DRAW']/num_games:.0%})")
        print("-" * 30)

if __name__ == "__main__":

    matchups = [
    # Random Agent vs. all others
    (RandomAgent, GreedyAgent),
    (RandomAgent, AlphaBetaAgent),
    (RandomAgent, MCTSAgent),

    # Greedy Agent vs. all stronger agents
    (GreedyAgent, AlphaBetaAgent),
    (GreedyAgent, MCTSAgent),

    # The final "heavyweight" matchup
    (AlphaBetaAgent, MCTSAgent)
]

    # --- Part (b) of the Assignment ONLY ---
    print("Running SIMULTANEOUS mode tournament as requested.")
    run_tournament(matchups, num_games=50, game_mode='simultaneous')

Running SIMULTANEOUS mode tournament as requested.

--- Running Tournament: SIMULTANEOUS Mode (50 games each) ---
Starting Matchup: RandomAgent vs GreedyAgent...
Matchup: RandomAgent (A) vs GreedyAgent (B)
  RandomAgent Wins: 5 (10%)
  GreedyAgent Wins: 16 (32%)
  Draws: 29 (58%)
------------------------------
Starting Matchup: RandomAgent vs AlphaBetaAgent...
Matchup: RandomAgent (A) vs AlphaBetaAgent (B)
  RandomAgent Wins: 0 (0%)
  AlphaBetaAgent Wins: 25 (50%)
  Draws: 25 (50%)
------------------------------
Starting Matchup: RandomAgent vs MCTSAgent...
Matchup: RandomAgent (A) vs MCTSAgent (B)
  RandomAgent Wins: 8 (16%)
  MCTSAgent Wins: 15 (30%)
  Draws: 27 (54%)
------------------------------
Starting Matchup: GreedyAgent vs AlphaBetaAgent...
Matchup: GreedyAgent (A) vs AlphaBetaAgent (B)
  GreedyAgent Wins: 0 (0%)
  AlphaBetaAgent Wins: 0 (0%)
  Draws: 50 (100%)
------------------------------
Starting Matchup: GreedyAgent vs MCTSAgent...
Matchup: GreedyAgent (A) vs MCTSAgent (