# tAIrritory

#### tAIrritory is a 7x3 turn-based strategy game where two players (P1 and P2) compete to dominate the middle row. Each player controls 6 pieces (3 Type-A and 3 Type-B), with unique movement and interaction rules. The game challenges players to outmaneuver their opponent through tactical decisions.
#### Key Features
#### Victory Condition: Control the middle row (Row 4) when the game ends, or face a draw.
#### Dynamic Rules: Pieces can block or interchange with opponents, and players must always make a valid move.
#### AI Integration: Train a reinforcement learning agent (DQN/PPO) to compete with human players.
#### GUI Frontend: Play the game through an intuitive, interactive interface.
#### tAIrritory combines strategy and artificial intelligence, offering fun gameplay and a platform to explore AI learning.

### Import neccessary packages

In [1]:
import numpy as np
import gymnasium as gym
import pygame
import torch

#### Create the GYMnasium environment and implement reset() and step() functions

In [31]:
class TAIrritoryEnv(gym.Env):

    metadata = {"render.modes": ["human"]}

    def __init__(self):
        super(TAIrritoryEnv, self).__init__()
        self.board = np.zeros((7, 3), dtype=int)
        self._initialize_board()
        self.action_space = gym.spaces.Box(low=0, high=np.array([6, 2, 6, 2]), shape=(4,), dtype=int)
        self.observation_space = gym.spaces.Box(low=-2, high=2, shape=(7, 3), dtype=np.int8)
        self.current_player = 1
        self.done = False
        self.winner = None
    
    def _initialize_board(self):
        self.board[0] = [2, 1, 2]
        self.board[1] = [2, 1, 2]
        self.board[5] = [-2, -1, -2]
        self.board[6] = [-2, -1, -2]

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.board = np.zeros((7, 3), dtype=int)
        self._initialize_board()
        self.current_player = 1
        self.done, self.winner = False, None
        return self.board, {}
    
    def _possible_move_for_piece(self, row, col) -> list:
        piece = self.board[(row, col)]
        possible_moves = []
        if self.current_player == 1:
            if row > 3:
                if piece == -1: 
                    if col-1 >= 0 and self.board[(col-1, row - 1)] not in [-2, -1, 1]:
                        possible_moves.append([col - 1, row - 1])
                
                    elif self.board[(col, row - 1)] not in [-2, -1, 1]:
                        possible_moves.append([col, row - 1])
                
                    elif col+1 < 2 and self.board[(col+1, row - 1)] not in [-2, -1, 1]:
                        possible_moves.append([col + 1, row - 1])
                
                else: 
                    if col-1 >= 0 and self.board[(col-1, row - 1)] not in [-2, -1, 2]:
                        possible_moves.append([col - 1, row - 1])
                
                    elif self.board[(col, row - 1)] not in [-2, -1, 2]:
                        possible_moves.append([col, row - 1])
                    
                    elif col+1 < 2 and self.board[(col+1, row - 1)] not in [-2, -1, 2]:
                        possible_moves.append([col + 1, row - 1])
        
        else:
            if row < 3:
                if piece == 1:
                    if col-1 >= 0 and self.board[(col-1, row + 1)] not in [2, 1, -1]:
                        possible_moves.append([col - 1, row + 1])
                
                    elif self.board[(col, row + 1)] not in [2, 1, -1]:
                        possible_moves.append([col, row + 1])
                    
                    elif col+1 < 2 and self.board[(col+1, row + 1)] not in [2, 1, -1]:
                        possible_moves.append([col + 1, row + 1])
                
                else:
                    if col-1 >= 0 and self.board[(col-1, row + 1)] not in [2, 1, -2]:
                        possible_moves.append([col - 1, row + 1])
                
                    elif self.board[(col, row + 1)] not in [2, 1, -2]:
                        possible_moves.append([col, row + 1])
                    
                    elif col+1 < 2 and self.board[(col+1, row + 1)] not in [2, 1, -2]:
                        possible_moves.append([col + 1, row + 1])
        
        return possible_moves
    
    def _is_game_over(self):
        game_over = False
        for row in range(7):
            for col in range(3):
                game_over = True if len(self._possible_move_for_piece(row, col)) > 0 else game_over
        return game_over
    
    def step(self, action):
        self.board[(action[0], action[1])], self.board[(action[2], action[3])] = \
        self.board[(action[2], action[3])], self.board[(action[0], action[1])]
        reward = 0 if not self._is_game_over() else reward = 1 if self.current_player == 2 else reward = -1
        terminated = False if reward == 0 else True
        truncated = False
        info = {}
        return self.board, reward, terminated, truncated, info
    


#### Register the new env to call it using gym.make()

In [32]:
gym.register(id="gymnasium_env/GridWorld-v0", entry_point=GridWorldEnv)

  logger.warn(f"Overriding environment {new_spec.id} already in registry.")


#### Now build a simple tabular model based AI to play the game

In [39]:
class Agent:
    def __init__(self, env: gym.Env, gamma=0.95, eps = 0.1):
        self.env = env
        self.gamma = gamma
        self.eps = eps
        self.state_values = np.zeros(
            shape=(self.env.unwrapped.size, self.env.action_space.n),
            dtype=np.float32,
        )
    
    def choose_action(self, obs): # simple Value iteration technoque
        next_state_vals = []
        for action in range(self.env.action_space.n):
            if action == 0:
                next_state_vals.append(self.state_values[np.clip(obs + np.array([1, 0]), 0, self.env.size - 1)])
            elif action == 1:
                next_state_vals.append(self.state_values[np.clip(obs + np.array([0, 1]), 0, self.env.size - 1)])
            elif action == 2:
                next_state_vals.append(self.state_values[np.clip(obs + np.array([-1, 0]), 0, self.env.size - 1)])
            else:
                next_state_vals.append(self.state_values[np.clip(obs + np.array([0, -1]), 0, self.env.size - 1)])
        return np.argmax(next_state_vals) if np.random.random() > self.eps else np.random.choice([0, 1, 2, 3])
    
    def update_value(self, obs, reward, next_obs):
        self.state_values[obs] = reward + self.gamma * self.state_values[next_obs]


In [40]:
env = gym.make("gymnasium_env/GridWorld-v0", size=4)
agent = Agent(env)
NUM_EPISODES = 10

In [41]:
for episode in range(NUM_EPISODES):
    done = False
    obs, _ = env.reset()
    step_counter = 0
    while not done:
        step_counter += 1
        action = agent.choose_action(obs['agent'])
        next_obs, reward, term, trunc, _ = env.step(action)
        agent.update_value(obs['agent'], reward, next_obs['agent'])
        done = term or trunc
    print(f"Episode {episode + 1} finished in {step_counter} steps!")
    env.reset()


Episode 1 finished in 1 steps!
Episode 2 finished in 2 steps!


  logger.warn(


KeyError: 8