In [1]:
import base64
import bz2
import importlib
import pickle

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

# model_path = "models/latest.pth"
model_path = "ds/models/offensive_4439.pth"

weights = torch.load(model_path)

PARAM = base64.b64encode(bz2.compress(pickle.dumps(weights)))
state_dict = pickle.loads(bz2.decompress(base64.b64decode(PARAM)))

In [2]:
%%writefile handyrl/envs/kaggle/geese/rule_based_goose.py

# This is a lightweight ML agent trained by self-play.
# After sharing this notebook,
# we will add Hungry Geese environment in our HandyRL library.
# https://github.com/DeNA/HandyRL
# We hope you enjoy reinforcement learning!


import base64
import bz2
import math
import pickle
from collections import defaultdict, deque

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

# Neural Network for Hungry Geese


class Dense(nn.Module):
    def __init__(self, units0, units1, bnunits=0, bias=True):
        super().__init__()
        if bnunits > 0:
            bias = False
        self.dense = nn.Linear(units0, units1, bias=bias)
        self.bnunits = bnunits
        self.bn = nn.BatchNorm1d(bnunits) if bnunits > 0 else None

    def forward(self, x):
        h = self.dense(x)
        if self.bn is not None:
            size = h.size()
            h = h.view(-1, self.bnunits)
            h = self.bn(h)
            h = h.view(*size)
        return h


class TorusConv2d(nn.Module):
    def __init__(self, input_dim, output_dim, kernel_size, bn):
        super().__init__()
        self.edge_size = (kernel_size[0] // 2, kernel_size[1] // 2)
        self.conv = nn.Conv2d(input_dim, output_dim, kernel_size=kernel_size)
        self.bn = nn.BatchNorm2d(output_dim) if bn else None

    def forward(self, x):
        h = torch.cat([x[:, :, :, -self.edge_size[1] :], x, x[:, :, :, : self.edge_size[1]]], dim=3)
        h = torch.cat([h[:, :, -self.edge_size[0] :], h, h[:, :, : self.edge_size[0]]], dim=2)
        h = self.conv(h)
        h = self.bn(h) if self.bn is not None else h
        return h


class Conv2d(nn.Module):
    def __init__(self, input_dim, output_dim, kernel_size, bn):
        super().__init__()
        self.conv = nn.Conv2d(input_dim, output_dim, kernel_size=kernel_size)
        self.bn = nn.BatchNorm2d(output_dim) if bn else None

    def forward(self, x):
        h = self.conv(x)
        h = self.bn(h) if self.bn is not None else h
        return h


class ChannelSELayer(nn.Module):
    def __init__(self, channel, reduction=8):
        super().__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid(),
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y.expand_as(x)


# https://github.com/Kaixhin/Rainbow/blob/master/model.py
# Factorised NoisyLinear layer with bias
class NoisyLinear(nn.Module):
    def __init__(self, in_features, out_features, std_init=0.5):
        super(NoisyLinear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.std_init = std_init
        self.weight_mu = nn.Parameter(torch.empty(out_features, in_features))
        self.weight_sigma = nn.Parameter(torch.empty(out_features, in_features))
        self.register_buffer("weight_epsilon", torch.empty(out_features, in_features))
        self.bias_mu = nn.Parameter(torch.empty(out_features))
        self.bias_sigma = nn.Parameter(torch.empty(out_features))
        self.register_buffer("bias_epsilon", torch.empty(out_features))
        self.reset_parameters()
        self.reset_noise()

    def reset_parameters(self):
        mu_range = 1 / math.sqrt(self.in_features)
        self.weight_mu.data.uniform_(-mu_range, mu_range)
        self.weight_sigma.data.fill_(self.std_init / math.sqrt(self.in_features))
        self.bias_mu.data.uniform_(-mu_range, mu_range)
        self.bias_sigma.data.fill_(self.std_init / math.sqrt(self.out_features))

    def _scale_noise(self, size):
        x = torch.randn(size, device=self.weight_mu.device)
        return x.sign().mul_(x.abs().sqrt_())

    def reset_noise(self):
        epsilon_in = self._scale_noise(self.in_features)
        epsilon_out = self._scale_noise(self.out_features)
        self.weight_epsilon.copy_(epsilon_out.ger(epsilon_in))
        self.bias_epsilon.copy_(epsilon_out)

    def forward(self, input):
        if self.training:
            return F.linear(
                input,
                self.weight_mu + self.weight_sigma * self.weight_epsilon,
                self.bias_mu + self.bias_sigma * self.bias_epsilon,
            )
        else:
            return F.linear(input, self.weight_mu, self.bias_mu)


class GeeseNet(nn.Module):
    def __init__(self):
        super().__init__()
        layers, filters = 12, 32
        self.conv0 = TorusConv2d(17, filters, (3, 3), True)
        self.blocks = nn.ModuleList([TorusConv2d(filters, filters, (3, 3), True) for _ in range(layers)])

        self.conv_p = TorusConv2d(filters, filters, (3, 3), True)
        self.conv_v = TorusConv2d(filters, filters, (3, 3), True)

        self.head_p = nn.Linear(filters, 4, bias=False)
        self.head_v1 = nn.Linear(filters * 2, filters, bias=False)
        self.head_v2 = nn.Linear(filters, 1, bias=False)

    def forward(self, x, _=None):
        h = F.relu_(self.conv0(x))
        for block in self.blocks:
            h = F.relu_(h + block(h))

        h_p = F.relu_(self.conv_p(h))
        h_head_p = (h_p * x[:, :1]).view(h_p.size(0), h_p.size(1), -1).sum(-1)
        p = self.head_p(h_head_p)

        h_v = F.relu_(self.conv_v(h))
        h_head_v = (h_v * x[:, :1]).view(h_v.size(0), h_v.size(1), -1).sum(-1)
        h_avg_v = h_v.view(h_v.size(0), h_v.size(1), -1).mean(-1)

        h_v = F.relu_(self.head_v1(torch.cat([h_head_v, h_avg_v], 1)))
        v = torch.tanh(self.head_v2(h_v))

        return {"policy": p, "value": v, "h_head_p": h_head_p, "h_head_v": h_head_v, "h_avg_v": h_avg_v}


# Input for Neural Network


NUM_ROW = 7
NUM_COL = 11
CENTER_ROW = NUM_ROW // 2
CENTER_COL = NUM_COL // 2


def to_offset(x):
    row = CENTER_ROW - x // NUM_COL
    col = CENTER_COL - x % NUM_COL
    return row, col


def to_row(offset, x):
    return (x // NUM_COL + offset) % NUM_ROW


def to_col(offset, x):
    return (x + offset) % NUM_COL


def distance(a, b):
    x = b[0] - a[0]
    y = b[1] - a[1]
    return (x, y), abs(x) + abs(y)


def around(a):
    return [
        ((a[0] - 1) % 7, a[1]),
        ((a[0] + 1) % 7, a[1]),
        (a[0], (a[1] - 1) % 11),
        (a[0], (a[1] + 1) % 11),
    ]


def empty_around_head(field, head, x):
    return [e for e in around(x) if field[e[0], e[1]] <= (distance(head, x)[1] + 1)]


def bfs_close_route(field, head):
    q = deque([head])
    searched = defaultdict(bool)
    max_dist = 0
    while len(q) != 0:
        v = q.popleft()
        searched[v] = True
        # 迂回した距離が計算できていない
        max_dist = max(max_dist, distance(head, v)[1])
        edges = [a for a in empty_around_head(field, head, v) if not searched[a]]
        for edge in edges:
            q.append(edge)
    return max_dist


def make_input(obses):
    b = np.zeros((17, 7 * 11), dtype=np.float32)
    obs = obses[-1]

    for p, pos_list in enumerate(obs["geese"]):
        # head position
        for pos in pos_list[:1]:
            b[0 + (p - obs["index"]) % 4, pos] = 1
        # tip position
        for pos in pos_list[-1:]:
            b[4 + (p - obs["index"]) % 4, pos] = 1
        # whole position
        for pos in pos_list:
            b[8 + (p - obs["index"]) % 4, pos] = 1

    # previous head position
    if len(obses) > 1:
        obs_prev = obses[-2]
        for p, pos_list in enumerate(obs_prev["geese"]):
            for pos in pos_list[:1]:
                b[12 + (p - obs["index"]) % 4, pos] = 1

    # food
    for pos in obs["food"]:
        b[16, pos] = 1

    return b.reshape(-1, 7, 11)


def make_input_centering_head(obses):
    b = np.zeros((17, 7, 11), dtype=np.float32)
    obs = obses[-1]

    player_goose_head = obs["geese"][obs["index"]][0]
    o_row, o_col = to_offset(player_goose_head)

    for p, geese in enumerate(obs["geese"]):
        # whole position
        for pos in geese:
            b[0 + (p - obs["index"]) % 4, to_row(o_row, pos), to_col(o_col, pos)] = 1
        # tip position
        for pos in geese[-1:]:
            b[4 + (p - obs["index"]) % 4, to_row(o_row, pos), to_col(o_col, pos)] = 1
        # head position
        for pos in geese[:1]:
            b[8 + (p - obs["index"]) % 4, to_row(o_row, pos), to_col(o_col, pos)] = 1

    # previous head position
    if len(obses) > 1:
        obs_prev = obses[-2]
        for p, geese in enumerate(obs_prev["geese"]):
            for pos in geese[:1]:
                b[12 + (p - obs["index"]) % 4, to_row(o_row, pos), to_col(o_col, pos)] = 1

    # food
    for pos in obs["food"]:
        b[16, to_row(o_row, pos), to_col(o_col, pos)] = 1

    return b


def make_input_2step(obses):
    b = np.zeros((25, 7 * 11), dtype=np.float32)

    obs = obses[-1]
    for p, geese in enumerate(obs["geese"]):
        # head position
        for pos in geese[:1]:
            b[0 + (p - obs["index"]) % 4, pos] = 1
        # tip position
        for pos in geese[-1:]:
            b[4 + (p - obs["index"]) % 4, pos] = 1
        # whole position
        for pos in geese:
            b[8 + (p - obs["index"]) % 4, pos] = 1

    if len(obses) > 1:
        obs = obses[-2]
        for p, geese in enumerate(obs["geese"]):
            # head position
            for pos in geese[:1]:
                b[12 + (p - obs["index"]) % 4, pos] = 1
            # tip position
            for pos in geese[-1:]:
                b[16 + (p - obs["index"]) % 4, pos] = 1
            # whole position
            for pos in geese:
                b[20 + (p - obs["index"]) % 4, pos] = 1

    # food
    for pos in obs["food"]:
        b[24, pos] = 1

    return b.reshape(-1, 7, 11)


def make_input_centering_head_for_rule(obses):
    b = {}
    for i in range(4):
        b[i] = defaultdict(list)
    obs = obses[-1]

    player_goose_head = obs["geese"][obs["index"]][0]
    o_row, o_col = to_offset(player_goose_head)

    for p, geese in enumerate(obs["geese"]):
        # whole position
        for pos in geese:
            b[(p - obs["index"]) % 4]["body"].append((to_row(o_row, pos), to_col(o_col, pos)))

    # previous head position
    if len(obses) > 1:
        obs_prev = obses[-2]
        for p, geese in enumerate(obs_prev["geese"]):
            for pos in geese[:1]:
                b[(p - obs["index"]) % 4]["previous"].append((to_row(o_row, pos), to_col(o_col, pos)))

    # food
    for pos in obs["food"]:
        b[0]["food"].append((to_row(o_row, pos), to_col(o_col, pos)))

    return b


def make_input_num_turn_of_fill(obses):
    b = np.zeros((7, 11), dtype=np.float32)
    obs = obses[-1]

    player_goose_head = obs["geese"][obs["index"]][0]
    o_row, o_col = to_offset(player_goose_head)

    for p, geese in enumerate(obs["geese"]):
        # マスが、何ターン後に空くか
        for i, pos in enumerate(geese[::-1]):
            b[to_row(o_row, pos), to_col(o_col, pos)] = i + 1

        # 自分の頭は、逆に進めないので、空くのは1周後
        if (p - obs["index"]) % 4 == 0:
            for pos in geese[:1]:
                if b[to_row(o_row, pos), to_col(o_col, pos)] < 4:
                    b[to_row(o_row, pos), to_col(o_col, pos)] = 4

    # previous head position
    # 自分が直前にいた場所も、最短で、一周後
    if len(obses) > 1:
        obs_prev = obses[-2]
        for p, geese in enumerate(obs_prev["geese"]):
            if (p - obs["index"]) % 4 == 0:
                for pos in geese[:1]:
                    if b[to_row(o_row, pos), to_col(o_col, pos)] < 3:
                        b[to_row(o_row, pos), to_col(o_col, pos)] = 3

    return b


def apply_rule(b, prob):
    """
    player head = (3, 5)
    ["NORTH", "SOUTH", "WEST", "EAST"]
    """
    north = (2, 5)
    south = (4, 5)
    west = (3, 4)
    east = (3, 6)
    neighbor = [north, south, west, east]

    # 隣接している場所に行けないケース
    for i, n in enumerate(neighbor):
        # 自分の直前の場所
        if n in b[0]["previous"]:
            prob[i] = -np.inf

        for p in range(4):
            # ガチョウの体がある場所 (しっぽ除く)
            if n in b[p]["body"][:-1]:
                prob[i] = -np.inf

    # 次の移動で頭がぶつかる可能性のあるケース
    for p in range(1, 4):
        if b[p]["body"] != []:
            (x, y), d = distance(b[0]["body"][0], b[p]["body"][0])
            if d == 2:
                if x < 0:
                    prob[0] -= 100 if north in b[0]["food"] else 50
                elif x > 0:
                    prob[1] -= 100 if south in b[0]["food"] else 50
                if y < 0:
                    prob[2] -= 100 if west in b[0]["food"] else 50
                elif y > 0:
                    prob[3] -= 100 if east in b[0]["food"] else 50

    # しっぽが伸びる可能性のあるケース
    for i, n in enumerate(neighbor):
        for p in range(1, 4):
            if (
                b[p]["body"] != []
                and n == b[p]["body"][-1]
                and any(food in around(b[p]["body"][0]) for food in b[0]["food"])
            ):
                prob[i] -= 100

    return prob


def apply_rule2(b, prob):
    """
    player head = (3, 5)
    ["NORTH", "SOUTH", "WEST", "EAST"]
    """
    north = (2, 5)
    south = (4, 5)
    west = (3, 4)
    east = (3, 6)
    neighbor = [north, south, west, east]

    # 閉域探索
    for i, n in enumerate(neighbor):
        if bfs_close_route(b, n) <= 3:
            prob[i] -= 10_000

    return prob


# Load PyTorch Model


PARAM = b"xxxxxxxxxx"

state_dict = pickle.loads(bz2.decompress(base64.b64decode(PARAM)))
model = GeeseNet()
# model = GeeseNetA()
model.load_state_dict(state_dict)
model.eval()


# Main Function of Agent

obses = []


def agent(obs, _):
    obses.append(obs)

    x = make_input(obses)
    # x = make_input_2step(obses)
    # x = make_input_centering_head(obses)
    y = make_input_centering_head_for_rule(obses)
    z = make_input_num_turn_of_fill(obses)

    with torch.no_grad():
        xt = torch.from_numpy(x).unsqueeze(0)
        o = model(xt)
    p = o["policy"].squeeze(0).detach().numpy()

    p = apply_rule(y, p)
    p = apply_rule2(z, p)

    print(f"player: {obs['index']}, actions: {p}")

    actions = ["NORTH", "SOUTH", "WEST", "EAST"]
    return actions[np.argmax(p)]

Overwriting handyrl/envs/kaggle/geese/rule_based_goose.py


In [3]:
# Read in the submission file
goose_file = "handyrl/envs/kaggle/geese/rule_based_goose.py"

with open(
    goose_file,
) as file:
    filedata = file.read()

# Replace the target string
filedata = filedata.replace("xxxxxxxxxx", PARAM.decode("utf-8"))

# Write the file out again
with open(goose_file, "w") as file:
    file.write(filedata)

In [4]:
from kaggle_environments import make

env = make("hungry_geese", debug=True)
env.reset()
env.run([goose_file, goose_file, goose_file, goose_file])
env.render(mode="ipython", width=800, height=700)

# env_module = importlib.import_module("handyrl.envs.kaggle.hungry_geese")
# e = env_module.Environment()
# e.reset()
#
# while not e.terminal():
#     obs = e.observation()
#     actions = {}
#     for player in e.turns():
#         actions[player] = e.rule_based_action_smart_geese(player, goose="rule_based_goose")
#     e.step(actions)
#     rewards = e.reward()
#     print(rewards)
# e.outcome()

Loading environment football failed: No module named 'gfootball'
player: 0, actions: [ 2.4157546  -3.4708006   0.50360084 -3.3386402 ]
player: 1, actions: [-3.4897304  1.0885134 -3.7438624  1.6624248]
player: 2, actions: [-2.4885197 -2.530464  -3.8980508  4.162038 ]
player: 3, actions: [-2.3832226  1.1662004  0.8808558 -2.661913 ]
player: 0, actions: [ 4.03047         -inf  0.3904482 -2.1372762]
player: 1, actions: [-1.1027219  4.033141        -inf -0.5829338]
player: 2, actions: [-1.8774167 -1.6579165       -inf  6.5278797]
player: 3, actions: [      -inf  1.6263834  1.5062618 -0.6047227]
player: 0, actions: [-62.820602         -inf -90.554924   -1.0692899]
player: 1, actions: [        -inf -118.80786      3.1019013  -47.290604 ]
player: 2, actions: [-2.531424  -4.31721         -inf  7.1883845]
player: 3, actions: [      -inf 0.87323034 1.0359837  0.6896374 ]
player: 0, actions: [2.4718919  0.8451531        -inf 0.83011013]
player: 1, actions: [ 1.1773642   5.1376185  -0.06255928     

In [5]:
# 0: 白
# 1: 青
# 2: 緑
# 3: 赤