In [None]:
from Minesweeper import Minesweeper
from MinesweeperGUI import MinesweeperGUI

import torch
from torch import nn

import pygame

from collections import deque
import copy
import typing
import random

import os
import datetime

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

import time

    Cell for doing some safety checking on the users directory structure. 
    This code should not be executed outside of its expected location. Modify the cell below at your own risk

In [26]:
root_files = os.listdir("..")

auth = False

if "execution.key" in root_files:
    with open("../execution.key", "r") as f:
        for line in f:
            if line.startswith("#"):
                continue
            else:
                if line == "ff7f6519d0f9fb3d1eef5fac3fec7e83":
                    auth = True

if not auth:
    raise RuntimeError(
        "Execution is not occuring in the correct directory structure."
        + "\nPlease re-pull from GitHub:"
        + " https://github.com/EkinsMatthew/MinesweeperDQN"
    )

else:
    if "outputs" not in root_files:
        os.mkdir("../outputs")

In [2]:
T = typing.TypeVar("T")

class Deck(typing.Generic[T]):
    def __init__(self, max_size: int):
        self.queue: deque[T] = deque()
        self.max_size = max_size

    def append(self, obs: T) -> None:
        if len(self.queue) >= self.max_size:
            self.queue.popleft()
        self.queue.append(obs)

    def sample(self, n: int) -> list[T]:
        return random.sample(self.queue, n)

    def mean(self) -> float:
        return sum(self.queue) / len(self.queue)

In [3]:
class BasicAgent(nn.Module):
    def __init__(
        self,
        input_shape: int,
        output_shape: int,
        relu_slope: float,
    ):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_shape, 128),
            nn.LeakyReLU(relu_slope),
            nn.Linear(128, 64),
            nn.LeakyReLU(relu_slope),
            nn.Linear(64, output_shape),
        )

    def forward(self, x) -> torch.Tensor:
        return self.model(x.flatten())

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class MinesweeperDQN(nn.Module):
    def __init__(self, input_shape, num_actions):
        super(MinesweeperDQN, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(
            in_channels=1,
            out_channels=16,
            kernel_size=3,
            stride=1,
            padding=1,
        )
        self.conv2 = nn.Conv2d(
            in_channels=16,
            out_channels=32,
            kernel_size=3,
            stride=1,
            padding=1,
        )

        # Calculate the size after convolution (for fully connected input)
        conv_output_size = (
            32 * input_shape[0] * input_shape[1]
        )  # 32 channels after conv2

        # Fully connected layers
        self.fc1 = nn.Linear(conv_output_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_actions)

    def forward(self, x):

        x = x.unsqueeze(0).unsqueeze(0)

        # Convolutional layers
        x = F.leaky_relu(self.conv1(x))
        x = F.leaky_relu(self.conv2(x))

        # Flatten the tensor before fully connected layers
        x = x.view(x.size(0), -1)

        # Fully connected layers
        x = F.leaky_relu(self.fc1(x))
        x = F.leaky_relu(self.fc2(x))

        # Output layer (action space)
        x = self.fc3(x)
        
        return x

In [5]:
class MinesweeperDQN(nn.Module):
    def __init__(self, input_shape, num_actions):
        super(MinesweeperDQN, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(
            in_channels=1,
            out_channels=16,
            kernel_size=4,
            stride=1,
            padding=1,
        )
        self.conv2 = nn.Conv2d(
            in_channels=16,
            out_channels=32,
            kernel_size=4,
            stride=1,
            padding=1,
        )

        # Calculate the size after convolution (for fully connected input)
        # This is now fixed to reflect the output size of conv2
        conv_output_size = 4 * 4 * 32

        # Fully connected layers
        self.fc1 = nn.Linear(conv_output_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, input_shape[0] * input_shape[0])

    def forward(self, x):

        x = x.unsqueeze(0).unsqueeze(0)

        # Convolutional layers
        x = F.leaky_relu(self.conv1(x))
        x = F.leaky_relu(self.conv2(x))

        # Flatten the tensor before fully connected layers
        x = x.view(x.size(0), -1)

        # Fully connected layers
        x = F.leaky_relu(self.fc1(x))
        x = F.leaky_relu(self.fc2(x))

        # Output layer (action space)
        x = self.fc3(x).flatten()
        
        return x

In [None]:
4*4*16

In [9]:
class Manager:
    def __init__(
        self,
        difficulty: typing.Literal["EZ", "B", "I", "E"],
        action_set_size: typing.Literal[1, 2, 3],
        gamma: float,
        memory_length: int,
        learning_rate: float,
        momentum: float,
        use_gui: bool,
        output_dir: str,
    ):
        """An object that monitors and manages the training of a MinesweeperAI.

        Parameters
        ----------
        difficulty : {"B", "I", "E"}
            Which classic Minesweeper difficulty rules (board size and mine
            count) should be used to create the backend game board? "B" for
            Beginner, "I" for Intermediate, and "E" for Expert.
        action_set_size : {1, 2, 3}
            What is the action set size for the player? There are three options:
            1 for a game where the player can only left click; 2 for a game
            where the player can left click to expose squares and also flag
            squares as mines; 3 for a game where the player can expose, flag,
            and also use the test flag function (traditionaly left and right
            click simultaneously)
        gamma : float
            _description_
        memory_length : int
            How long should the memory set of the experience replayer be before
            past examples are deleted from the set?

        Raises
        ------
        ValueError
            If the user passes an illegal value for the difficulty argument
        """
        self.difficulty = difficulty

        if self.difficulty == "EZ":
            x = 6
            y = 6
            mines = 4
        elif self.difficulty == "B":
            x = 9
            y = 9
            mines = 10
        elif self.difficulty == "I":
            x = 16
            y = 16
            mines = 40
        elif self.difficulty == "E":
            x = 30
            y = 16
            mines = 99
        else:
            raise ValueError(
                f"Difficulty must be one of three values (EZ, B, I, E): {difficulty}"
            )

        self.game = Minesweeper()
        self.game.initialize_game_state(x, y, mines)

        self.use_gui = use_gui

        if self.use_gui:
            self.gui = MinesweeperGUI(
                self.game,
                zoom_factor=10,
                tile_set_number=2,
            )

        # The number of actions that the deep learning player can make
        self.ACTION_SET_SIZE = action_set_size
        # The action vector is the size of the board multiplied by the number of
        # actions that are possible to take
        self.ACTION_VECTOR_SIZE = x * y * self.ACTION_SET_SIZE
        # The index numbers of the various actions our model can take
        self.ACTION_FUNCTIONS = {
            0: self.game.discover_tile,
            1: self.game.flag_tile,
            2: self.game.test_number_tile,
        }

        self.gamma = gamma

        self.start_game(x, y, mines)

        self.online = MinesweeperDQN(
            input_shape=(self.game.x, self.game.y),
            num_actions=action_set_size,
        ).to(self.game.device)

        self.target = MinesweeperDQN(
            input_shape=(self.game.x, self.game.y),
            num_actions=action_set_size,
        ).to(self.game.device)

        # Initialize the models together in an identical form
        self.target.load_state_dict(self.online.state_dict())

        # self.online = BasicAgent(
        #     input_shape=self.game.x * self.game.y,
        #     output_shape=self.game.x * self.game.y * self.ACTION_SET_SIZE,
        #     relu_slope=0.2,
        # ).to(self.game.device)

        # self.target = copy.deepcopy(self.online)

        self.learning_rate = learning_rate
        self.momentum = momentum

        self.optimizer = torch.optim.RMSprop(
            self.online.parameters(),
            lr=self.learning_rate,
            momentum=self.momentum,
        )

        self.total_loss: list[torch.Tensor] = []
        self.running_reward: list[float] = []
        self.rewards: list[float] = []

        self.q_preds: list[float] = []
        self.q_targets: list[float] = []

        self.q_spread: list[tuple[torch.Tensor, torch.Tensor]] = []

        # Queue to store all of the state action pairs, the reward of that
        # action, as well as the next resultant state and next action
        self.experience_replayer: Deck[
            tuple[
                # State
                torch.Tensor,
                # Action
                int,
                # Reward
                float,
                # Last state terminality
                bool,
                # Result State
                torch.Tensor,
            ]
        ] = Deck(max_size=memory_length)

        # Some pointers to assist with constructing the experience replayer
        self.last_state: torch.Tensor = torch.empty(0, device=self.game.device)
        self.last_action: int
        self.last_action_reward: float

        # Counter for the number of actions taken
        self.steps: int = 0

        # The location where general outputs will be written to
        self.output_dir = output_dir

    def start_game(
        self,
        x: int,
        y: int,
        mines: int | None = None,
    ):
        """Initialize the internal minesweeper game that will be trained on.

        Parameters
        ----------
        x : int
            The horizontal size of the game board.
        y : int
            The vertical size of the game board.
        mines : int | None
            The number of mines to place on the game board.
        """
        self.game.initialize_game_state(x, y, mines)

    def restart_game(self):
        """Reset the internal game when necessary."""
        self.game.reinitialize_game_state()

    def process_move(
        self,
        action_number: int,
        action_x: int,
        action_y: int,
    ) -> tuple[float, bool]:
        """Take an action coded as an action type and location, perform that
        action on the game board, and determine the reward to give to our
        internal model.

        Parameters
        ----------
        action_number : int
            The number that corresponds to the action type that should be
            executed; see action functions
        action_x : int
            _description_
        action_y : int
            _description_

        Returns
        -------
        tuple[float, bool]
            _description_
        """
        # Given our action and coordinate
        result = self.ACTION_FUNCTIONS[action_number](action_x, action_y)

        # If the move was illegal
        if not result:
            reward = -5
            terminal = False
        #  or if flagging, no explicit reward
        if action_number == 1:
            reward = 0
            terminal = False

        # If the previous was a terminal state
        if self.game.over:
            if self.game.lost:
                reward = -2 * self.game.num_tiles
            else:
                reward = 2 * self.game.num_tiles
            terminal = True

        else:
            # Else, there was a legal discovery performed that did not lose the game
            discovery_rate = self.game.num_discovered / (
                self.game.num_tiles - self.game.num_mines
            )

            # Linearly increase the reward based on the number of tiles the online
            # agent has discovered
            reward = 1 + discovery_rate * 1
            terminal = False

        if terminal:
            self.gui.refresh()
            self.restart_game()

        return reward, terminal

    def poll_model(self) -> tuple[torch.Tensor, torch.Tensor]:

        online_Qs: torch.Tensor = self.online(self.game.board)
        target_Qs: torch.Tensor = self.target(self.game.board)

        self.q_spread.append(
            (
                online_Qs.min().detach().cpu(),
                online_Qs.max().detach().cpu(),
            )
        )

        return online_Qs, target_Qs

    def step(self, epsilon: float):
        # Poll the model for an action
        online_Qs, _ = self.poll_model()

        # Random interger to determine if we take a random action
        n = random.uniform(0, 1)

        # Take a random action
        if n < epsilon:
            # Select all parts of the action space randomly
            action_number = random.randint(0, self.ACTION_SET_SIZE - 1)
            action_x = random.randint(0, self.game.x - 1)
            action_y = random.randint(0, self.game.y - 1)

            # Calculate the location in the tensor that this action would have
            # come from
            action_loc = action_y * self.ACTION_SET_SIZE + action_x

        # Take an online-chosen action
        else:
            # Get the location of the maximally ranked Q value
            action_loc = int(torch.argmax(online_Qs))

            # Given where that Q is, what action does that imply?
            action_number = action_loc % self.ACTION_SET_SIZE

            # Ignoring the number of actions, which tile are we acting on?
            normalized_action_loc = action_loc // self.ACTION_SET_SIZE

            # X and Y coordinate of that tile
            action_x = normalized_action_loc % self.game.y
            action_y = normalized_action_loc // self.game.x

        # Reward for this action
        reward, terminal = self.process_move(
            action_number,
            action_x,
            action_y,
        )

        current_board = self.game.board.clone()

        # After the first move, start building the experience replayer
        if self.steps > 0:
            # Add the tuple set of (s_t, a_t, r_t, s_t+1)
            self.experience_replayer.append(
                (
                    self.last_state,
                    self.last_action,
                    self.last_action_reward,
                    self.last_action_terminal,
                    current_board,
                )
            )

        # Store the state of this game board now so that we can use it for
        # saving states to the experience_replayer
        self.last_state = current_board
        self.last_action = action_loc
        self.last_action_reward = reward
        self.last_action_terminal = terminal

        self.running_reward.append(reward)

        self.steps += 1

    def reset_target(self):
        """Function for updating the online model to match the target over time"""
        self.target.load_state_dict(self.online.state_dict())

    def play(
        self, M: int, T: int, epsilon_target: float, batch_size: int, training_frequency
    ):
        epsilon = 1.0
        for m in range(M):
            for t in range(T):
                previous_steps = t + m * T

                # Fix epsilon at zero for the first episode
                if m == 0:
                    epsilon = 1.0

                elif (m > 0) & (m < 5):
                    # Linear annealing of epsilon over the second two episodes
                    epsilon = 1 - (1 - epsilon_target) * (
                        (previous_steps - T) / (4 * T)
                    )

                elif m >= 5:
                    epsilon = epsilon_target

                self.step(epsilon=epsilon)

                print(
                    f"t: {t} of Episode: {m} Epsilon: {round(epsilon, 3)}\r",
                    end="",
                )

                if (self.steps > batch_size) & (self.steps % training_frequency == 0):
                    # print(f"\n\tTraining: ")

                    training_round_loss: list[torch.Tensor] = []
                    training_examples = self.experience_replayer.sample(batch_size)

                    for te in training_examples:
                        # Expected reward as defined by the max Q value of our
                        # online network
                        s_i = te[0]  # Current state
                        a_i = te[1]  # Action taken to get here
                        r_i = te[2]  # Reward for that action
                        terminal_state = te[3]  # was it a terminal action
                        s_i_prime = te[4]  # Resultant state from action

                        # What does the target say about this action?
                        target_Q_i = self.target.forward(
                            s_i
                        ).detach()  # "Detach from torch graph so it doesn't
                        # mess up gradient calculation." -E. Crouse
                        # https://github.com/ImagineOrange/Deep-RL-Paper-Implementations/blob/main/CHEEMS_DDDQN.py
                        # Line 711

                        # What does the target say about the expected reward of the next step?
                        target_Q_i_prime = self.target.forward(s_i_prime).detach()

                        # If we need to take into acount the future reward
                        if not terminal_state:
                            y_i = r_i + (
                                self.gamma
                                * target_Q_i_prime[int(torch.argmax(target_Q_i_prime))]
                                # self.gamma
                                # * target_Q_i[a_i]
                            )
                            self.q_targets.append(y_i.detach().cpu())
                        else:
                            y_i = r_i
                            self.q_targets.append(y_i)

                        q_pred = self.online.forward(s_i)[a_i]

                        self.q_preds.append(q_pred.detach().cpu())

                        loss = (q_pred - y_i) ** 2

                        # print(loss)

                        self.optimizer.zero_grad()
                        loss.backward()
                        self.optimizer.step()
                        training_round_loss.append(loss.detach().cpu())

                    average_training_loss = sum(training_round_loss) / len(
                        training_round_loss
                    )

                    # print(f"\t\tLoss: {average_training_loss}")
                    self.total_loss.append(average_training_loss)
                    # print(f"\t\tRunning Reward: {round(self.running_reward.mean(), 3)}")
                    self.rewards.append(
                        sum(self.running_reward) / len(self.running_reward)
                    )
                    self.running_reward = []

                if self.steps % 500 == 0:
                    self.reset_target()
                    print(
                        f"\n\t\t Target Reset"
                        + f"\n\t\t\tAverage Loss since last update: {sum(self.total_loss[-10:])/10}"
                        + f"\n\t\t\tAverage Reward since last update: {sum(self.rewards[-10:])/10}"
                    )

                if self.use_gui:
                    self.gui.refresh()
                    pygame.event.wait(timeout=1)

            # Save the models at the end of each episode
            torch.save(
                self.online.state_dict(),
                f"{self.output_dir}/online_model_episode_{m}.pth",
            )
            torch.save(
                self.target.state_dict(),
                f"{self.output_dir}/target_model_episode_{m}.pth",
            )

In [None]:
execution_start_time = datetime.datetime.now().strftime(r"%Y-%m-%d--%H-%M-%S")
output_dir = f"../outputs/{execution_start_time}"

os.mkdir(output_dir)

paramters = {
    "difficulty": "EZ",
    "action_set_size": 1,
    "gamma": 0.9,
    "memory_length": int(1e6),
    "learning_rate": 0.001,
    "momentum": 0.0,
    "use_gui": True,
    "M": 10,
    "T": int(5e5),
}

with open(f"./outputs/{execution_start_time}/model_parameters.JSON", "w") as f:
    f.write(paramters.__str__())

agent = Manager(
    difficulty=paramters["difficulty"],
    action_set_size=paramters["action_set_size"],
    gamma=paramters["gamma"],
    memory_length=paramters["memory_length"],
    learning_rate=paramters["learning_rate"],
    momentum=paramters["momentum"],
    use_gui=paramters["use_gui"],
    output_dir=output_dir,
)

agent.play(
    M=paramters["M"],
    T=paramters["T"],
    epsilon_target=0.05,
    batch_size=64,
    training_frequency=10,
)

In [None]:
M = 10
T = 100
epsilon_target = 0.3

for m in range(M):
    for t in range(T):
        previous_steps = t + m * T

        # Fix epsilon at zero for the first episode
        if m == 0:
            epsilon = 1.0

        elif (m > 0) & (m < 3):
            # Linear annealing of epsilon over the second two episodes
            epsilon = 1 - (1 - epsilon_target) * (
                (previous_steps - T) / (2 * T)
            )

        elif m >= 3:
            epsilon = epsilon_target

        print(m, t, previous_steps, round(epsilon, 3))

In [38]:
import numpy as np
import matplotlib.pyplot as plt


def plot_average_w_window(data, window_size: int, data_name: str):
    # Calculate the moving average of the data
    averages = np.convolve(data, np.ones(window_size) / window_size, mode="valid")

    # Create the x-axis values corresponding to the averages
    x_values = np.arange(window_size - 1, len(data))

    # Create a figure and axis
    fig, ax = plt.subplots(figsize=(10, 6))

    # Plot the moving average
    ax.plot(
        x_values,
        averages,
        label=f"Moving Average (window size={window_size})",
        color="blue",
    )

    # Set title and labels
    ax.set_title(f"Average {data_name} Over Time")
    ax.set_xlabel("Training Step")
    ax.xaxis.set_major_formatter(
        ticker.FuncFormatter(lambda x, y: f"{x * window_size:,.0f}")
    )

    ax.set_ylabel(f"Average {data_name}")
    ax.legend()
    ax.grid()

    # Show the plot
    plt.show()

In [None]:
plot_average_w_window(agent.total_loss, window_size=1000, data_name="Loss")

In [None]:
plot_average_w_window(agent.rewards, window_size=1000, data_name="Reward")

In [33]:
mins = [tup[0] for tup in agent.q_spread]
maxes = [tup[1] for tup in agent.q_spread]

spread = [tup[1] - tup[0] for tup in agent.q_spread]

In [None]:
len(mins)

In [None]:
spread

In [None]:
plot_average_w_window(spread, window_size=10000, data_name="Raw Q Spread")

In [None]:
plot_average_w_window(spread, window_size=100000, data_name="Raw Q Spread")

In [None]:
maxes

In [None]:
agent.q_spread

In [24]:
def plot_q_spread(data_tuples, window_size: int, data_name: str):
    # Split the list of tuples into two separate lists
    first_entries = [tup[0] for tup in data_tuples]
    second_entries = [tup[1] for tup in data_tuples]

    # Calculate the moving averages for both first and second entries
    downsampled_mins = first_entries[::window_size]
    downsampled_maxes = second_entries[::window_size]

    # Create the x-axis values corresponding to the averages
    x_values = np.arange(0, len(downsampled_mins))

    # Create a figure and axis
    ax: plt.Axes
    fig, ax = plt.subplots(figsize=(10, 6))

    # Plot the moving average for the first entries
    ax.plot(
        x_values,
        downsampled_mins,
        label=f"Q Min (window size={window_size})",
        color="blue",
    )

    # Plot the moving average for the second entries
    ax.plot(
        x_values,
        downsampled_maxes,
        label=f"Q Max (window size={window_size})",
        color="green",
    )

    # Set title and labels
    ax.set_title(f"Average {data_name} Over Time")
    ax.set_xlabel("Training Step")
    ax.xaxis.set_major_formatter(
        ticker.FuncFormatter(lambda x, y: f"{int(x * window_size)}")
    )

    ax.set_ylabel(f"Average {data_name}")
    ax.legend()
    ax.grid()

    # Show the plot
    plt.show()

In [None]:
plot_q_spread(agent.q_spread, window_size=1000, data_name="Raw Q")

In [None]:
while True:
    time.sleep(0.1)
    agent.step(epsilon=0)
    agent.gui.refresh()
    pygame.event.wait(timeout=1)

In [None]:
agent.gui.refresh()

In [None]:
agent.game.discover_tile(3, 0)

In [None]:
online_Qs = agent.online.forward(agent.game.board)

In [None]:
# Get the location of the maximally ranked Q value
action_loc = int(torch.argmax(online_Qs))

# Given where that Q is, what action does that imply?
action_number = action_loc % agent.ACTION_SET_SIZE

# Ignoring the number of actions, which tile are we acting on?
normalized_action_loc = action_loc // agent.ACTION_SET_SIZE

# X and Y coordinate of that tile
action_x = normalized_action_loc % agent.game.y
action_y = normalized_action_loc // agent.game.x

In [None]:
action_x

In [None]:
online_Qs

In [None]:
agent.game.board

In [None]:
agent.game.board.transpose(-1, 0)

In [None]:
agent.q_preds

In [None]:
q_targets = []

for q_target in agent.q_targets:
    if type(q_target) is int:
        q_targets.append(q_target)
    else:
        q_targets.append(q_target.detach().cpu())

In [None]:
# Create a figure and axis
fig, ax = plt.subplots(figsize=(100, 20))

x_axis = np.arange(0, len(agent.q_targets))

ax.scatter(
    x_axis,
    agent.q_preds,
    label="Q Pred",
    s=1,
    alpha=0.7,
)
ax.scatter(
    x_axis,
    agent.q_targets,
    label="Q Target",
    s=1,
    alpha=0.7,
)

# Set title and labels
ax.set_title("Q Values over Training Epochs")
ax.set_xlabel("Training Example")
ax.set_ylabel("Q Value")
ax.legend()
# ax.grid()

# Show the plot
plt.show()

In [None]:
# Create a figure and axis
fig, ax = plt.subplots(figsize=(30, 18))

sample_rate = 1000

q_preds_sampled = agent.q_preds[::sample_rate]
q_targets_sampled = agent.q_targets[::sample_rate]

x_axis = np.arange(0, len(q_targets_sampled))

ax.scatter(
    x_axis,
    q_preds_sampled,
    label="Q Pred",
    s=1,
    alpha=0.7,
)
ax.scatter(
    x_axis,
    q_targets_sampled,
    label="Q Target",
    s=1,
    alpha=0.7,
)

# Set title and labels
ax.set_title("Q Values over Training Epochs")
ax.set_xlabel(f"Every {sample_rate}th Training Example")
ax.set_ylabel("Q Value")
ax.legend()
# ax.grid()

# Show the plot
plt.show()

In [None]:
len(q_targets)

In [None]:
plt.plot(agent.total_loss)

In [None]:
plt.plot(agent.rewards)

In [None]:
agent.rewards

In [None]:
import pandas as pd
import numpy as np
import time
import random

repetitions = 1000000

start_time = time.time()
dictionary_list = []
for _ in range(repetitions):
    dictionary_data = {k: random.random() for k in range(30)}
    dictionary_list.append(dictionary_data)
end_time = time.time()
print('Execution time for generation [list of dict (row store)] = %.6f seconds' % (end_time-start_time))

start_time = time.time()
df_final1 = pd.DataFrame.from_dict(dictionary_list)
end_time = time.time()
print('Execution time for conversion to pandas [list of dict (row store)] = %.6f seconds' % (end_time-start_time))

start_time = time.time()
list_dictionnary = {k: [] for k in range(30)}
for k in range(30):
    for _ in range(repetitions):
        list_dictionnary[k].append(random.random())
end_time = time.time()
print('Execution time for generation [dict of list (column store) = %.6f seconds' % (end_time-start_time))

start_time = time.time()
df_final2 = pd.DataFrame(list_dictionnary)
end_time = time.time()
print('Execution time for conversion to pandas [dict of list (column store)] = %.6f seconds' % (end_time-start_time))

print(df_final1.shape)
print(df_final2.shape)

In [None]:
df_final1

In [None]:
df_final2

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(agent.rewards)

In [None]:
plt.plot([x.cpu() for x in agent.total_loss])

In [None]:
agent.rewards

In [None]:
output = agent.online.forward(agent.game.board.flatten())

output[torch.argmax(output)]

In [None]:
import pandas as pd

In [None]:
agent.game.board

In [None]:
agent.steps

In [None]:
len(agent.experience_replayer.queue)

In [None]:
agent.experience_replayer.queue[997]

In [None]:
for replay in agent.experience_replayer.queue:
    print(replay[1][0], replay[2])

In [None]:
pygame.quit()

In [None]:
pygame.display.flip()

In [None]:
agent.gui.refresh()

In [None]:
game.discover_tile(8, 8)
pygame.event.wait()
gui.refresh()

In [None]:
game.board.transpose(0, 1)

In [None]:
# Initialize Models

board_size = (10, 10)

In [None]:
game.flag_tile(20, 20)

In [None]:
template = torch.zeros([3, 3], dtype=torch.int8)
mask = torch.tensor([[1, 0, 1], [0, 1, 0], [0, 0, 1]])
data = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [None]:
int(torch.argmax(data))

In [None]:
data.flatten()

In [None]:
torch.where(mask == 1, data, template)

In [None]:
mask.shape

In [None]:
mask == 1

In [None]:
template.masked_fill(mask, 9)

In [None]:
template.shape

In [None]:
mask.shape

In [None]:
data.shape

In [None]:
for coord in torch.nonzero(mask, as_tuple=False):
    print(coord[0], coord[1])

In [None]:
mask

In [None]:
(mask == 1) & (data == 5)

In [None]:
x = 10
y = x

[(i, j) for i in range(x) for j in range(y)]

In [None]:
mask

In [None]:
torch.where(mask == 1)

In [None]:
x = torch.empty(0)

In [None]:
x