In [11]:
!pip install -q -U kaggle-environments
!pip list | grep kaggle

In [12]:
import base64
import bz2
import pickle

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

model_path = "../input/hungry-geese-models/alpha_65.pth"
# model_path = "../../../models/latest.pth"

# weights = torch.load(model_path)
weights = torch.load(model_path, map_location=torch.device("cpu"))

PARAM = base64.b64encode(bz2.compress(pickle.dumps(weights)))
state_dict = pickle.loads(bz2.decompress(base64.b64decode(PARAM)))

In [13]:
%%writefile submission.py

# This is a lightweight ML agent trained by self-play.
# After sharing this notebook,
# we will add Hungry Geese environment in our HandyRL library.
# https://github.com/DeNA/HandyRL
# We hope you enjoy reinforcement learning!


import base64
import bz2
import math
import pickle
import time
from collections import defaultdict, deque
from copy import deepcopy

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from kaggle_environments.envs.hungry_geese.hungry_geese import Action, translate
from kaggle_environments.helpers import histogram

# MCTS


class MCTS:
    def __init__(self, game, nn_agent, eps=1e-8, cpuct=1.0):
        self.game = game
        self.nn_agent = nn_agent
        self.eps = eps
        self.cpuct = cpuct

        self.Qsa = {}  # stores Q values for s,a (as defined in the paper) (状態 s で行為 a を行った際の平均報酬)
        self.Nsa = {}  # stores #times edge s,a was visited (状態 s で行為 a を行った際の訪問回数)
        self.Ns = {}  # stores #times board s was visited (状態 s の訪問回数)
        self.Ps = {}  # stores initial policy (returned by neural net) (状態 s で行為 a を選択する確率。ニューラルネットワークの出力)

        self.Vs = {}  # stores game.getValidMoves for board s

        self.last_obs = None

    def getActionProb(self, obs, timelimit=1.0):
        start_time = time.time()
        while time.time() - start_time < timelimit:
            self.search(obs, self.last_obs)

        s = self.game.stringRepresentation(obs)
        i = obs.index
        counts = [self.Nsa[(s, i, a)] if (s, i, a) in self.Nsa else 0 for a in range(self.game.getActionSize())]
        prob = counts / np.sum(counts)
        self.last_obs = obs
        return prob

    def search(self, obs, last_obs):
        s = self.game.stringRepresentation(obs)

        if s not in self.Ns:
            values = [-10] * 4
            for i in range(4):
                if len(obs.geese[i]) == 0:
                    continue

                # leaf node
                self.Ps[(s, i)], values[i] = self.nn_agent.predict(obs, last_obs, i)

                valids = self.game.getValidMoves(obs, last_obs, i)
                self.Ps[(s, i)] = self.Ps[(s, i)] * valids  # masking invalid moves
                sum_Ps_s = np.sum(self.Ps[(s, i)])
                if sum_Ps_s > 0:
                    self.Ps[(s, i)] /= sum_Ps_s  # renormalize

                self.Vs[(s, i)] = valids
                self.Ns[s] = 0
            return values

        best_acts = [None] * 4
        for i in range(4):
            if len(obs.geese[i]) == 0:
                continue

            valids = self.Vs[(s, i)]
            cur_best = -float("inf")
            best_act = self.game.actions[-1]

            # pick the action with the highest upper confidence bound
            for a in range(self.game.getActionSize()):
                if valids[a]:
                    if (s, i, a) in self.Qsa:
                        u = self.Qsa[(s, i, a)] + self.cpuct * self.Ps[(s, i)][a] * math.sqrt(self.Ns[s]) / (
                            1 + self.Nsa[(s, i, a)]
                        )
                    else:
                        u = self.cpuct * self.Ps[(s, i)][a] * math.sqrt(self.Ns[s] + self.eps)

                    if u > cur_best:
                        cur_best = u
                        best_act = self.game.actions[a]

            best_acts[i] = best_act

        next_obs = self.game.getNextState(obs, last_obs, best_acts)
        values = self.search(next_obs, obs)

        for i in range(4):
            if len(obs.geese[i]) == 0:
                continue

            a = self.game.actions.index(best_acts[i])
            v = values[i]
            if (s, i, a) in self.Qsa:
                self.Qsa[(s, i, a)] = (self.Nsa[(s, i, a)] * self.Qsa[(s, i, a)] + v) / (self.Nsa[(s, i, a)] + 1)
                self.Nsa[(s, i, a)] += 1

            else:
                self.Qsa[(s, i, a)] = v
                self.Nsa[(s, i, a)] = 1

        self.Ns[s] += 1
        return values


class HungryGeese(object):
    def __init__(
        self, rows=7, columns=11, actions=[Action.NORTH, Action.SOUTH, Action.WEST, Action.EAST], hunger_rate=40
    ):
        self.rows = rows
        self.columns = columns
        self.actions = actions
        self.hunger_rate = hunger_rate

    def getActionSize(self):
        return len(self.actions)

    def getNextState(self, obs, last_obs, directions):
        next_obs = deepcopy(obs)
        next_obs.step += 1
        geese = next_obs.geese
        food = next_obs.food

        for i in range(4):
            goose = geese[i]

            if len(goose) == 0:
                continue

            head = translate(goose[0], directions[i], self.columns, self.rows)

            # Check action direction
            if last_obs is not None and head == last_obs.geese[i][0]:
                geese[i] = []
                continue

            # Consume food or drop a tail piece.
            if head in food:
                food.remove(head)
            else:
                goose.pop()

            # Add New Head to the Goose.
            goose.insert(0, head)

            # If hunger strikes remove from the tail.
            if next_obs.step % self.hunger_rate == 0:
                if len(goose) > 0:
                    goose.pop()

        goose_positions = histogram(position for goose in geese for position in goose)

        # Check for collisions.
        for i in range(4):
            if len(geese[i]) > 0:
                head = geese[i][0]
                if goose_positions[head] > 1:
                    geese[i] = []

        return next_obs

    def getValidMoves(self, obs, last_obs, index):
        geese = obs.geese
        pos = geese[index][0]
        obstacles = {position for goose in geese for position in goose[:-1]}
        if last_obs is not None:
            obstacles.add(last_obs.geese[index][0])

        valid_moves = [translate(pos, action, self.columns, self.rows) not in obstacles for action in self.actions]

        return valid_moves

    def stringRepresentation(self, obs):
        return str(obs.geese + obs.food)


# Neural Network for Hungry Geese


class TorusConv2d(nn.Module):
    def __init__(self, input_dim, output_dim, kernel_size, bn):
        super().__init__()
        self.edge_size = (kernel_size[0] // 2, kernel_size[1] // 2)
        self.conv = nn.Conv2d(input_dim, output_dim, kernel_size=kernel_size)
        self.bn = nn.BatchNorm2d(output_dim) if bn else None

    def forward(self, x):
        h = torch.cat([x[:, :, :, -self.edge_size[1] :], x, x[:, :, :, : self.edge_size[1]]], dim=3)
        h = torch.cat([h[:, :, -self.edge_size[0] :], h, h[:, :, : self.edge_size[0]]], dim=2)
        h = self.conv(h)
        h = self.bn(h) if self.bn is not None else h
        return h


class GeeseNetAlpha(nn.Module):
    def __init__(self):
        super().__init__()
        layers, filters = 12, 64
        self.conv0 = TorusConv2d(17, filters, (3, 3), True)
        self.blocks = nn.ModuleList([TorusConv2d(filters, filters, (3, 3), True) for _ in range(layers)])

        self.conv_p = TorusConv2d(filters, filters, (3, 3), True)
        self.conv_v = TorusConv2d(filters, filters, (3, 3), True)

        self.head_p1 = nn.Linear(filters * 5 + 77, filters * 3, bias=False)
        self.head_p2 = nn.Linear(filters * 3, 4, bias=False)
        self.head_v1 = nn.Linear(filters * 5 + 77, filters * 3, bias=False)
        self.head_v2 = nn.Linear(filters * 3, 1, bias=False)

    def forward(self, x, _=None):
        h = F.relu_(self.conv0(x))
        for block in self.blocks:
            h = F.relu_(h + block(h))

        h_p = F.relu_(self.conv_p(h))
        h_head_p = (h_p * x[:, :1]).view(h_p.size(0), h_p.size(1), -1).sum(-1)
        h_head_p2 = (h_p * x[:, 1:2]).view(h_p.size(0), h_p.size(1), -1).sum(-1)
        h_head_p3 = (h_p * x[:, 2:3]).view(h_p.size(0), h_p.size(1), -1).sum(-1)
        h_head_p4 = (h_p * x[:, 3:4]).view(h_p.size(0), h_p.size(1), -1).sum(-1)
        h_avg_p1 = h_p.view(h_p.size(0), h_p.size(1), -1).mean(-1)
        h_avg_p2 = h_p.view(h_p.size(0), h_p.size(1), -1).mean(1)

        h_p = F.relu_(self.head_p1(torch.cat([h_head_p, h_head_p2, h_head_p3, h_head_p4, h_avg_p1, h_avg_p2], 1)))
        p = self.head_p2(h_p)

        h_v = F.relu_(self.conv_v(h))
        h_head_v = (h_v * x[:, :1]).view(h_v.size(0), h_v.size(1), -1).sum(-1)
        h_head_v2 = (h_v * x[:, 1:2]).view(h_v.size(0), h_v.size(1), -1).sum(-1)
        h_head_v3 = (h_v * x[:, 2:3]).view(h_v.size(0), h_v.size(1), -1).sum(-1)
        h_head_v4 = (h_v * x[:, 3:4]).view(h_v.size(0), h_v.size(1), -1).sum(-1)
        h_avg_v1 = h_v.view(h_v.size(0), h_v.size(1), -1).mean(-1)
        h_avg_v2 = h_v.view(h_v.size(0), h_v.size(1), -1).mean(1)

        h_v = F.relu_(self.head_v1(torch.cat([h_head_v, h_head_v2, h_head_v3, h_head_v4, h_avg_v1, h_avg_v2], 1)))
        v = torch.tanh(self.head_v2(h_v))

        return p, v  # {"policy": p, "value": v}


class NNAgent:
    def __init__(self, state_dict):
        self.model = GeeseNetAlpha()
        self.model.load_state_dict(state_dict)
        self.model.eval()

    def predict(self, obs, last_obs, index):
        x = self._make_input(obs, last_obs, index)
        with torch.no_grad():
            xt = torch.from_numpy(x).unsqueeze(0)
            p, v = self.model(xt)

        return p.squeeze(0).detach().numpy(), v.item()

    # Input for Neural Network
    def _make_input(self, obs, last_obs, index):
        b = np.zeros((17, 7 * 11), dtype=np.float32)

        for p, pos_list in enumerate(obs.geese):
            # head position
            for pos in pos_list[:1]:
                b[0 + (p - index) % 4, pos] = 1
            # tip position
            for pos in pos_list[-1:]:
                b[4 + (p - index) % 4, pos] = 1
            # whole position
            for pos in pos_list:
                b[8 + (p - index) % 4, pos] = 1

        # previous head position
        if last_obs is not None:
            for p, pos_list in enumerate(last_obs.geese):
                for pos in pos_list[:1]:
                    b[12 + (p - index) % 4, pos] = 1

        # food
        for pos in obs.food:
            b[16, pos] = 1

        return b.reshape(-1, 7, 11)


# Load PyTorch Model


PARAM = b"xxxxxxxxxx"

state_dict = pickle.loads(bz2.decompress(base64.b64decode(PARAM)))

game = HungryGeese()
agent = NNAgent(state_dict)
mcts = MCTS(game, agent)


def alphageese_agent(obs, config):
    timelimit = config.actTimeout + 0.2 if obs["step"] >= 40 else config.actTimeout
    action = game.actions[np.argmax(mcts.getActionProb(obs, timelimit=timelimit))]
    return action.name

Overwriting submission.py


In [14]:
# Read in the submission file
with open(
    "submission.py",
) as file:
    filedata = file.read()

# Replace the target string
filedata = filedata.replace("xxxxxxxxxx", PARAM.decode("utf-8"))

# Write the file out again
with open("submission.py", "w") as file:
    file.write(filedata)

In [15]:
from kaggle_environments import make

env = make("hungry_geese", debug=True)

env.reset()
env.run(["submission.py", "submission.py", "submission.py", "submission.py"])
env.render(mode="ipython", width=800, height=700)

Goose Collision: WEST
Goose Collision: NORTH
