In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

import random
from einops.layers.torch import Rearrange
from einops import rearrange

from typing import Any, Dict, Tuple, Optional
from game_mechanics import GoEnv, choose_move_randomly, load_pkl, play_go, save_pkl
from tqdm.notebook import tqdm

from functools import partial
import pandas as pd
from datetime import datetime

In [2]:
def choose_move(observation, legal_moves, neural_network: nn.Module) -> int:
    """Called during competitive play. It acts greedily given current state of the board and value
    function dictionary. It returns a single move to play.

    Args:
        state:
        
    Returns:
        move sampled from the policy network
    """
    probs, value = my_network(observation, legal_moves)
    probs = probs[0].cpu().detach().numpy()
    move = np.random.choice(range(82), p=probs)
    return move


def choose_move_human(observation: np.ndarray, legal_moves:np.ndarray, neural_network: nn.Module) -> int:
    print(observation)
    i, j = [int(_) for _ in input().split(" ")]

    return (i-1)*9 + j-1

def random_move(observation, legal_moves):
    return random.choice(legal_moves)

def choose_move_no_network_human(observation: np.ndarray, legal_moves: np.ndarray) -> int:
    """The arguments in play_game() require functions that only take the state as input.

    This converts choose_move() to that format.
    """
    return choose_move_human(observation, legal_moves, my_network)

In [3]:
class alpha_go_zero_batch(nn.Module):
    def __init__(self):
        super().__init__()
        self.stem = nn.Sequential(
            nn.Linear(81,100),
            nn.ReLU(),
            nn.Linear(100,100),
            nn.ReLU()
        )
        
        self.tower1 = nn.Sequential(
            nn.Linear(100,100),
            nn.ReLU(),
            nn.Linear(100,82)
        )
        
        self.tower2 = nn.Sequential(
            nn.Linear(100,100),
            nn.ReLU(),
            nn.Linear(100,1),
            nn.ReLU(),
        )


    def forward(self, x, legal_moves):
        illegal = lambda legal: [move not in legal for move in range(82)]
        mask = torch.stack([torch.as_tensor(illegal(lm)) for lm in legal_moves])        

        
        x = rearrange(x, 'b w h -> b (w h)')
        x = self.stem(x)
        x1 = self.tower1(x)
        x1 = x1.masked_fill(mask, -torch.inf)
        x1 = F.softmax(x1, dim=-1)
        x2 = self.tower2(x)
        x2 = torch.tanh(x2)     
            
        return x1, x2

In [4]:
class Reservoir:
    def __init__(self):
        self.data = []
        
    def append(self, observation, old_value, reward, done, legal_moves, chosen_move):
        self.data.append((observation, old_value, reward, done, legal_moves, chosen_move))
    
    def sample_pop(self, size):
        if size > len(self.data):
            size = len(self.data)
        random.shuffle(self.data)
        sample = self.data[:size]
        self.data = self.data[size:]
        return self.stack(sample)
    
    def sample(self, size):
        
    
    def stack(self, data):
        observations = torch.stack([d[0] for d in data])
        old_values = torch.as_tensor([d[1] for d in data], dtype=torch.float32)
        rewards = torch.as_tensor([d[2] for d in data], dtype=torch.float32)
        dones = torch.as_tensor([d[3] for d in data], dtype=torch.float32)
        legal_moves = [d[4] for d in data]
        chosen_moves = [d[5] for d in data]
        return observations, old_values, rewards, dones, legal_moves, chosen_moves
    
    def __len__(self):
        return len(self.data)

IndentationError: expected an indented block (2797179358.py, line 19)