In [None]:
import gym
import gym_2048
import numpy as np
import math
import copy  # For deep copying the environment
import matplotlib.pyplot as plt

# MCTS Node definition
class MCTSNode:
    def __init__(self, state, action=None, parent=None):
        self.state = state
        self.parent = parent
        self.children = []
        self.visits = 0
        self.value = 0
        self.action = action  # Store the action taken to reach this state

    def add_child(self, child_state, action):
        child_node = MCTSNode(child_state, action=action, parent=self)
        self.children.append(child_node)
        return child_node

    def update(self, reward):
        self.visits += 1
        self.value += reward

# MCTS Class definition
class MCTS:
    def __init__(self, env, exploration_weight=1.4):
        self.env = env
        self.exploration_weight = exploration_weight

    def select(self, node):
        # UCB (Upper Confidence Bound) score calculation for selecting the best child
        def ucb_score(n):
            if n.visits == 0:
                return float('inf')  # Assign infinite value if not visited
            return (n.value / n.visits) + self.exploration_weight * math.sqrt(math.log(node.visits) / n.visits)

        return max(node.children, key=ucb_score)

    def expand(self, node):
        actions = range(self.env.action_space.n)
        for action in actions:
            # Create a copy of the environment to simulate the current state
            env_copy = copy.deepcopy(self.env)
            env_copy.reset()
            env_copy.unwrapped.board = copy.deepcopy(node.state)  # Set the board state for the copied environment

            next_state, reward, done, _ = env_copy.step(action)
            if not done:  # Check if the resulting state is terminal
                node.add_child(copy.deepcopy(env_copy.unwrapped.board), action)

    def simulate(self, node):
        # Create a new environment copy to simulate from the node state
        env_copy = copy.deepcopy(self.env)
        env_copy.reset()
        env_copy.unwrapped.board = copy.deepcopy(node.state)  # Set the board state for the copied environment

        done = False
        total_reward = 0

        while not done:
            action = env_copy.action_space.sample()  # Randomly choose an action for the simulation
            _, reward, done, _ = env_copy.step(action)
            total_reward += reward

        return total_reward

    def backpropagate(self, node, reward):
        # Backpropagate the reward through the tree
        current_node = node
        while current_node:
            current_node.update(reward)
            current_node = current_node.parent

    def search(self, root_state, iterations=100):
        root_node = MCTSNode(root_state)

        for _ in range(iterations):
            node = root_node

            # Use a deep copy of the environment to simulate the state
            env_copy = copy.deepcopy(self.env)
            env_copy.reset()
            env_copy.unwrapped.board = copy.deepcopy(root_state)  # Set the board state for the copied environment

            # Selection: Navigate to the most promising child node
            while node.children:
                node = self.select(node)

            # Expansion: Expand if the game is not over
            actions = range(self.env.action_space.n)
            done = False
            for action in actions:
                next_state, reward, done, _ = env_copy.step(action)
                if not done:
                    break

            if not done:
                self.expand(node)

            # Simulation: Simulate to get a reward
            reward = self.simulate(node)

            # Backpropagation: Backpropagate the reward through the tree
            self.backpropagate(node, reward)

        # Choose the best action based on the most visits
        if root_node.children:
            return max(root_node.children, key=lambda n: n.visits)
        else:
            return None  # No children were generated, likely a terminal state

# Initialize the environment and MCTS agent
env = gym.make('2048-v0')
mcts = MCTS(env)

# Evaluator class definition
class Evaluator:
    def __init__(self, env, mcts, episodes=10):
        self.env = env
        self.mcts = mcts
        self.episodes = episodes

    def evaluate(self):
        scores = []
        highest_tiles = []
        tile_achievements = {2048: 0, 4096: 0, 8192: 0}

        for episode in range(self.episodes):
            state = self.env.reset()
            done = False
            total_score = 0

            while not done:
                # Use MCTS to determine the best action for each state
                best_action_node = self.mcts.search(state, iterations=100)
                if best_action_node is None:
                    break  # If no action is found, terminate the episode

                action = best_action_node.action

                # Execute the chosen action in the actual environment
                state, reward, done, _ = self.env.step(action)
                total_score += reward

            # Collect the final score and highest tile achieved
            scores.append(total_score)
            highest_tile = np.max(self.env.unwrapped.board)
            highest_tiles.append(highest_tile)

            # Update tile achievement counts
            for tile in tile_achievements.keys():
                if highest_tile >= tile:
                    tile_achievements[tile] += 1

        # Calculate and print metrics
        average_score = np.mean(scores) if scores else 0
        win_rate_2048 = tile_achievements[2048] / self.episodes if self.episodes > 0 else 0
        win_rate_4096 = tile_achievements[4096] / self.episodes if self.episodes > 0 else 0
        win_rate_8192 = tile_achievements[8192] / self.episodes if self.episodes > 0 else 0
        highest_tile_achieved = max(highest_tiles) if highest_tiles else 0

        print(f"Average Score: {average_score}")
        print(f"Win Rate for 2048 Tile: {win_rate_2048 * 100:.2f}%")
        print(f"Win Rate for 4096 Tile: {win_rate_4096 * 100:.2f}%")
        print(f"Win Rate for 8192 Tile: {win_rate_8192 * 100:.2f}%")
        print(f"Highest Tile Achieved in All Episodes: {highest_tile_achieved}")

        return {
            "scores": scores,
            "highest_tiles": highest_tiles,
            "average_score": average_score,
            "win_rate_2048": win_rate_2048,
            "win_rate_4096": win_rate_4096,
            "win_rate_8192": win_rate_8192,
            "highest_tile": highest_tile_achieved
        }

# Evaluate the model's performance
evaluator = Evaluator(env, mcts, episodes=10)
evaluation_results = evaluator.evaluate()

# Extract scores and highest tiles for visualization
scores = evaluation_results['scores']
highest_tiles = evaluation_results['highest_tiles']

# Plotting the results
# Plotting average score per episode
plt.figure(figsize=(10, 6))
plt.plot(range(len(scores)), scores, label="Score per Episode")
plt.xlabel("Episode")
plt.ylabel("Score")
plt.title("Score Progression Over Episodes")
plt.legend()
plt.show()

# Plotting tile achievement distribution
plt.figure(figsize=(10, 6))
plt.hist(highest_tiles, bins=[0, 512, 1024, 2048, 4096, 8192, 16384], edgecolor='black')
plt.xlabel("Highest Tile")
plt.ylabel("Frequency")
plt.title("Distribution of Highest Tile Achieved")
plt.show()
