In [None]:
import datasets
import numpy as np
import pandas as pd
import os
import sys
import itertools

In [None]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

# データの前処理

In [None]:
datasets_raw = []
dataset_files = []
for file in os.listdir("/root/robocup/submission_data"):
    if file.endswith(".csv"):
        datasets_raw.append(pd.read_csv(f"/root/robocup/submission_data/{file}"))
        dataset_files.append(file)

for index, dataset in enumerate(datasets_raw):
    datasets_raw[index] = dataset.iloc[-20:]

In [None]:
def one_hot(df: pd.DataFrame) -> pd.DataFrame:
    d = {
        "AEteam": 0,
        "CYRUS": 1,
        "FRA-UNIted": 2,
        "HELIOS2024": 3,
        "ITAndroids": 4,
        "Mars": 5,
        "Oxsy": 6,
        "R2D2": 7,
        "RoboCIn": 8,
        "YuShan2024": 9,
    }
    df["l_name"] = df["l_name"].map(d)
    df["r_name"] = df["r_name"].map(d)
    return df


def drop(df: pd.DataFrame) -> pd.DataFrame:
    agents = [f"l{id}" for id in range(1, 12)] + [f"r{id}" for id in range(1, 12)]
    b = ["vx", "vy", "t", "body", "neck", "vwidth", "stamina"]
    drop_cols = [f"{a}_{b}" for a, b in itertools.product(agents, b)]
    drop_cols += ["b_vx", "b_vy"]
    drop_cols += [
        "#",
        "cycle",
        "stopped",
        "playmode",
        "l_score",
        "l_pen_score",
        "r_score",
        "r_pen_score",
    ]
    return df.drop(columns=drop_cols)


def frame(df: pd.DataFrame) -> pd.DataFrame:
    df["frame"] = df.reset_index().index
    return df


def process_data(df: pd.DataFrame) -> list[pd.DataFrame]:
    df = one_hot(df)
    df = frame(df)
    return df


for index, dataset in enumerate(datasets_raw):
    datasets_raw[index] = process_data(dataset)

In [None]:
class MinMax:
    def __init__(self, min, max):
        self.min = min
        self.max = max
        if self.min >= self.max:
            raise ValueError("min must be less than max")

    def __call__(self, x):
        return (x - self.min) / (self.max - self.min)

    def inverse(self, x):
        return x * (self.max - self.min) + self.min

    def __repr__(self):
        return f"MinMax({self.min}, {self.max})"


def swap_rl(df):
    df["l_name"], df["r_name"] = df["r_name"], df["l_name"]
    df["b_x"] *= -1

    for i in range(1, 12):
        l_x, r_x = f"l{i}_x", f"r{i}_x"
        l_y, r_y = f"l{i}_y", f"r{i}_y"

        df[l_x], df[r_x] = -df[r_x].values, -df[l_x].values
        df[l_y], df[r_y] = df[r_y].values, df[l_y].values

    return df


min_max_d = np.load("/root/robocup/datas/min_max_d.npy", allow_pickle=True).item()

In [None]:
def clean_and_merge_datasets(datas):
    data_list = []
    for data in datas:
        df = pd.DataFrame(data)
        data_list.append(df)
    datas = pd.concat(data_list)
    return datas


def name_onehot(dfs):
    for i in range(10):
        dfs[f"l_name_{i}"] = dfs["l_name"] == i
        dfs[f"r_name_{i}"] = dfs["r_name"] == i
        dfs[f"l_name_{i}"] = (dfs["l_name"] == i).astype(int)
        dfs[f"r_name_{i}"] = (dfs["r_name"] == i).astype(int)
    return dfs


def drop_unnecessary_columns(dfs):
    dfs = dfs.drop(
        columns=[
            "#",
            "cycle",
            "stopped",
            "playmode",
            "l_name",
            "r_name",
            # "goal_type",
            "l_score",
            "r_score",
            "l_pen_score",
            "r_pen_score",
        ]
    )
    return dfs


def min_max_normalize(dfs, min_max_d=None):
    if min_max_d is None:
        min_max_d = {col: MinMax(min(dfs[col]), max(dfs[col])) for col in dfs.columns}
    for col in dfs.columns:
        dfs[col] = min_max_d[col](dfs[col])
    return dfs, min_max_d


def revert_min_max_normalize(dfs, min_max_d):
    for col in dfs.columns:
        dfs[col] = min_max_d[col].inverse(dfs[col])
    return dfs


def divide_dataframe(dfs, df_size=50):
    df_list = []

    for i in range(0, len(dfs), df_size):
        df = dfs.iloc[i : i + df_size]
        df_list.append(df)
    return df_list


def list_to_numpy(dfs: list) -> np.ndarray:
    cols = dfs[0].columns
    return np.array([df.values for df in dfs]).astype(np.float32), cols


def revert_numpy_from_list(dfs: np.ndarray, cols) -> list[pd.DataFrame]:
    return [pd.DataFrame(df, columns=cols) for df in dfs]


submission = datasets_raw
submission = clean_and_merge_datasets(submission)
# display(submission.iloc[:100])
submission = name_onehot(submission)
submission = drop_unnecessary_columns(submission)
submission, _ = min_max_normalize(submission, min_max_d)
submission = divide_dataframe(submission, df_size=20)

submission, cols = list_to_numpy(submission)

# モデルの読み込み

In [None]:
import joblib
import os

import visualizer

from tqdm.notebook import tqdm
import datasets
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
from torch import nn
import torchmetrics
import pytorch_lightning as pl

PROJECT_NAME = "RobocupTrajectoryPrediction_DataExpanded"
MODEL_NAME = "BiGRU_dataExpanded_2"
GROUP_NAME = "BiGRU"
HIDDEN_DIM = 256
NUM_LAYER = 4
LEARNING_RATE = 0.001

torch.set_float32_matmul_precision("high")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
class EuclideanDistance(torchmetrics.Metric):
    def __init__(self, cols, min_max_d, **kwargs):
        super().__init__(**kwargs)
        self.cols = cols
        self.min_max_d = min_max_d
        self.add_state("sum", default=torch.tensor(0.0), dist_reduce_fx="sum")
        self.add_state("count", default=torch.tensor(0), dist_reduce_fx="sum")
        self._mins = torch.tensor(
            [min_max_d[col].min for col in cols], dtype=torch.float32
        )
        self._maxs = torch.tensor(
            [min_max_d[col].max for col in cols], dtype=torch.float32
        )
        self._scale = self._maxs - self._mins

    def update(self, preds, target):
        indices = torch.cat([torch.tensor([0]), torch.arange(5, 201, 9)])
        final_preds = preds[:, -1, :]
        final_target = target[:, -1, :]

        inversed_preds = torch.zeros_like(final_preds)
        inversed_target = torch.zeros_like(final_target)

        mins = self._mins.to(device=device, dtype=final_preds.dtype)
        scale = self._scale.to(device=device, dtype=final_preds.dtype)

        inversed_preds = final_preds.clone()
        inversed_target = final_target.clone()
        n_cols = len(self.cols)
        inversed_preds[:, :n_cols] = final_preds[:, :n_cols] * scale + mins
        inversed_target[:, :n_cols] = final_target[:, :n_cols] * scale + mins

        errors = torch.sqrt(
            (inversed_preds[:, indices] - inversed_target[:, indices]) ** 2
            + (inversed_preds[:, indices + 1] - inversed_target[:, indices + 1]) ** 2
        )
        self.sum += torch.sum(errors)
        self.count += errors.size(0)

    def compute(self):
        return self.sum / self.count

In [None]:
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import WandbLogger


class LitBiGRU(pl.LightningModule):
    def __init__(
        self, input_dim, hidden_dim, output_dim, num_layers, seq_length, lr=0.001
    ):
        super().__init__()
        self.save_hyperparameters()
        self.gru = nn.GRU(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=True,
        )
        self.fc = nn.Linear(hidden_dim * 2, output_dim * seq_length)
        self.criterion = nn.MSELoss()
        self.euclidean_distance = EuclideanDistance(cols, min_max_d)
        self.train_losses = []
        self.val_losses = []

    def forward(self, x):
        out, _ = self.gru(x)
        last_out = out[:, -1, :]
        output = self.fc(last_out)
        return output.view(-1, 30, self.hparams.output_dim)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        self.log("train_loss", loss, on_step=False, on_epoch=True)
        self.log(
            "train_euclidean_distance",
            self.euclidean_distance(y_hat, y),
            on_step=False,
            on_epoch=True,
        )
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
        self.log("val_loss", loss, on_step=False, on_epoch=True)
        self.log(
            "val_euclidean_distance",
            self.euclidean_distance(y_hat, y),
            on_step=False,
            on_epoch=True,
        )
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.lr)
        return optimizer

In [None]:
# load model
model_path = "<model_path>"

model = torch.load(model_path)
# もしくは
# model = LitBiGRU.load_from_checkpoint(model_path)

In [None]:
# show model summary
print(model)

# 提出データの予測

In [None]:
import visualizer


def f():
    target = submission
    with torch.no_grad():
        model.eval()
        preds = model(torch.tensor(target).to(device=device))
        preds = preds.cpu().numpy()

    tareget_df = []
    for i in range(len(preds)):
        df = pd.DataFrame(preds[i], columns=cols)
        for j in df.columns:
            df[j] = min_max_d[j].inverse(df[j])
        tareget_df.append(df)

    # concat
    # display(tareget_df)

    dfs = []
    for index, file_name in enumerate(dataset_files):
        pred_df = tareget_df[index]
        sub_df = pd.read_csv(f"/root/robocup/submission_data/{file_name}")

        sub_cols = sub_df.columns
        df = pd.concat([sub_df, pred_df])
        df = df[sub_cols]
        dfs.append(df)

    # index rest
    for index, df in enumerate(dfs):
        df.index = range(1, len(df) + 1)

    # df[#] = index
    for index, df in enumerate(dfs):
        df["#"] = df.index

    for index, df in enumerate(dfs):
        dfs[index] = df[
            [
                "#",
                "l1_x",
                "l1_y",
                "l2_x",
                "l2_y",
                "l3_x",
                "l3_y",
                "l4_x",
                "l4_y",
                "l5_x",
                "l5_y",
                "l6_x",
                "l6_y",
                "l7_x",
                "l7_y",
                "l8_x",
                "l8_y",
                "l9_x",
                "l9_y",
                "l10_x",
                "l10_y",
                "l11_x",
                "l11_y",
                "r1_x",
                "r1_y",
                "r2_x",
                "r2_y",
                "r3_x",
                "r3_y",
                "r4_x",
                "r4_y",
                "r5_x",
                "r5_y",
                "r6_x",
                "r6_y",
                "r7_x",
                "r7_y",
                "r8_x",
                "r8_y",
                "r9_x",
                "r9_y",
                "r10_x",
                "r10_y",
                "r11_x",
                "r11_y",
                "b_x",
                "b_y",
            ]
        ]
    # last 30
    for index, df in enumerate(dfs):
        dfs[index] = df.iloc[-30:]
    # save csv
    os.makedirs("/root/robocup/submission_data_out", exist_ok=True)
    for index, df in enumerate(dfs):
        df.to_csv(
            f"/root/robocup/submission_data_out/{dataset_files[index]}", index=False
        )

    return dfs


dfs = f()