In [None]:
from pathlib import Path

import polars as pl
import pandas as pd
import numpy as np

from sklearn.model_selection import GroupKFold
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import StandardScaler


import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

import gc
import time

import torch

import pytorch_lightning as L
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint, TQDMProgressBar
from pytorch_lightning.loggers import WandbLogger
from tqdm import tqdm

from transformers import get_cosine_schedule_with_warmup



In [3]:
exp_name = '000_NN_NoImage'

In [None]:
DATA_PATH = Path('/Users/gouyashuto/localrepository/atmacup18/input')
OUTPUT_DIR = Path('/Users/gouyashuto/localrepository/atmacup18/output')

In [5]:
train_df = pl.read_csv(DATA_PATH / 'train_features.csv')
test_df = pl.read_csv(DATA_PATH / 'test_features.csv')

print(train_df.shape)
print(test_df.shape)

(43371, 30)
(1727, 12)


In [6]:
CAT_COLS = ['gearShifter', 'scene']

TARGET_COLS = [
    'x_0', 'y_0', 'z_0',
    'x_1', 'y_1', 'z_1',
    'x_2', 'y_2', 'z_2',
    'x_3', 'y_3', 'z_3',
    'x_4', 'y_4', 'z_4',
    'x_5', 'y_5', 'z_5'
]

In [7]:
def get_agg_exprs(agg_cols) -> list[pl.Expr]:
    # 同一シーンから特徴量作成
    exprs = []
    exprs += [pl.col(agg_col).shift(-1).over("scene").alias(f"{agg_col}_shift-1") for agg_col in agg_cols] # 1ステップ前の時間の値
    exprs += [pl.col(agg_col).shift(1).over("scene").alias(f"{agg_col}_shift1") for agg_col in agg_cols] # 1ステップ後の時間の値
    exprs += [pl.col(agg_col).diff(-1).over("scene").alias(f"{agg_col}_diff-1") for agg_col in agg_cols] # 1ステップ前の時間の値との差分
    exprs += [pl.col(agg_col).diff(1).over("scene").alias(f"{agg_col}_diff1") for agg_col in agg_cols] # 1ステップ後の時間の値との差分
    exprs += [pl.col(agg_col).mean().over("scene").alias(f"{agg_col}_mean") for agg_col in agg_cols] # 同一シーンの平均値
    exprs += [pl.col(agg_col).std().over("scene").alias(f"{agg_col}_std") for agg_col in agg_cols] # 同一シーンの標準偏差
    exprs += [pl.col(agg_col).max().over("scene").alias(f"{agg_col}_max") for agg_col in agg_cols] # 同一シーンの最大値
    exprs += [pl.col(agg_col).min().over("scene").alias(f"{agg_col}_min") for agg_col in agg_cols] # 同一シーンの最小値
    return exprs

In [8]:
def preprocess(df: pl.DataFrame) -> pl.DataFrame:
    agg_cols = ['vEgo', 'aEgo', 'steeringAngleDeg', 'steeringTorque', 'gas'] # 同一シーンから集計する値のカラム名
    df = (
        df
        .with_columns(
            scene = pl.col('ID').str.split('_').list[0],
            decisecond = pl.col('ID').str.split('_').list[1].cast(pl.Int32),
        )
        .sort(['scene', 'decisecond'])
        .with_columns(get_agg_exprs(agg_cols))
    )
    return df

## Dataset

In [9]:
class MLPDataset(Dataset):
    def __init__(
        self,
        features: pd.DataFrame,
        targets: pd.DataFrame,
    ):
        self.features = features.values
        self.targets = targets.values

    def __len__(self):
        return len(self.features)

    def __getitem__(self, index):
        features = self.features[index]
        targets = self.targets[index]

        return (
            torch.tensor(features, dtype=torch.float),
            torch.tensor(targets, dtype=torch.float),
        )

class TestDataset(Dataset):
    def __init__(
        self,
        features: pd.DataFrame,
    ):
        self.features = features.values

    def __len__(self):
        return len(self.features)

    def __getitem__(self, index):
        features = self.features[index]

        return torch.tensor(features, dtype=torch.float)

## Model

In [10]:
# define a class for the MLP model
class MLP(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, output_dim),
        )

    def forward(self, x):
        return self.model(x)

input_dim = 100
output_dim = 18
model = MLP(input_dim, output_dim)
input = torch.randn(2, input_dim)
output = model(input)
print(output.shape)
print(output)

torch.Size([2, 18])
tensor([[ 0.0189,  0.0607,  0.0861, -0.1658,  0.0208,  0.0201,  0.1802, -0.1413,
         -0.0683,  0.0515,  0.0894,  0.0587,  0.2160,  0.0752, -0.1135, -0.0469,
         -0.1377,  0.0232],
        [-0.0541, -0.0602,  0.0029, -0.1399, -0.0520, -0.1057,  0.1586, -0.2311,
         -0.2254,  0.1875,  0.0515,  0.0254,  0.2069, -0.1377,  0.3052, -0.0423,
          0.0830, -0.1090]], grad_fn=<AddmmBackward0>)


In [11]:
class MLPModel(L.LightningModule):
    def __init__(
        self, config, input_dim: int, output_dim: int, num_warmup_steps: int | None = None, num_training_steps: int | None = None
    ):
        super().__init__()
        self.config = config
        self.backbone = MLP(input_dim, output_dim)
        self.num_warmup_steps = num_warmup_steps
        self.num_training_steps = num_training_steps
        self.loss_fn = nn.L1Loss()

        # == record ==
        self.validation_step_outputs = []

    def forward(self, x):
        return self.backbone(x)

    def training_step(self, batch, batch_idx):
        x, target = batch

        x = x.to(CONFIG['device'])
        target = target.to(CONFIG['device'])

        y_pred = self(x)

        loss = self.loss_fn(y_pred, target.float())

        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, target = batch
        x = x.to(CONFIG['device'])
        target = target.to(CONFIG['device'])

        # == pred ==
        with torch.no_grad():
            y_pred = self(x)

        self.validation_step_outputs.append({"preds": y_pred, "targets": target})

    def on_validation_epoch_end(self):

        # = merge batch data =
        outputs = self.validation_step_outputs

        output_val = torch.cat([x['preds'] for x in outputs], dim=0).cpu().detach()
        target_val = torch.cat([x['targets'] for x in outputs], dim=0).cpu().detach()

        # = compute validation loss =
        loss = self.loss_fn(output_val.squeeze(), target_val.float())

        self.log("valid_loss", loss, True)

        # clear validation outputs
        self.validation_step_outputs = list()

        return {'valid_loss': loss}

    def configure_optimizers(self):
        # == define optimizer ==
        model_optimizer = torch.optim.AdamW(
            self.parameters(),
            lr=self.config["learning_rate"],
            weight_decay=self.config["weight_decay"],
        )

        # == define learning rate scheduler ==
        lr_scheduler = get_cosine_schedule_with_warmup(
            model_optimizer,
            num_warmup_steps=self.num_warmup_steps,
            num_training_steps=self.num_training_steps,
        )

        return {
            "optimizer": model_optimizer,
            "lr_scheduler": {
                "scheduler": lr_scheduler,
                "interval": "epoch",
                "monitor": "val_loss",
                "frequency": 1,
            },
        }

## Train Function

In [None]:
CONFIG = {
    "seed": 42,
    "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    "output_dir": OUTPUT_DIR / exp_name,
    "train_batch_size": 16,  # 32
    "valid_batch_size": 64,
    "learning_rate": 5e-2,
    # "scheduler": 'CosineAnnealingLR',
    "min_lr": 1e-6,
    "T_max": 500,
    "weight_decay": 1e-6,
    "patience": 10,
    "n_accumulate": 1,
    'folds': 5,
    'n_workers': 4,
    'debug': True,
    'mixed_precision': True,
    'epochs': 30,
}

In [13]:
def train(X: pd.DataFrame, y: pd.DataFrame):
    gkf = GroupKFold(n_splits=5)
    groups = X['scene']

    models = []
    oof = np.zeros_like(y)
    for fold, (train_idx, valid_idx) in enumerate(gkf.split(X, y, groups=groups)):
        print('=' * 10, f'fold: {fold} start' + '=' * 10)
        train_X = X.iloc[train_idx].drop(columns=['scene'])
        train_y = y.iloc[train_idx]
        valid_X = X.iloc[valid_idx].drop(columns=['scene'])
        valid_y = y.iloc[valid_idx]

        train_dataset = MLPDataset(train_X, train_y)
        val_dataset = MLPDataset(valid_X, valid_y)

        train_dataloader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=CONFIG["train_batch_size"],
            shuffle=True,
            num_workers=CONFIG["n_workers"],
            pin_memory=True,
            persistent_workers=True,
            drop_last=True,
        )

        val_dataloader = torch.utils.data.DataLoader(
            val_dataset,
            batch_size=CONFIG["valid_batch_size"],
            shuffle=False,
            num_workers=CONFIG["n_workers"],
            pin_memory=True,
            persistent_workers=True,
        )

        num_warmup_steps = len(train_dataloader)
        num_training_steps = len(train_dataloader) * CONFIG["epochs"]

        # == init model ==
        model = MLPModel(
            config=CONFIG,
            input_dim=train_X.shape[1],
            output_dim=train_y.shape[1],
            num_warmup_steps=num_warmup_steps,
            num_training_steps=num_training_steps,
        )

        # == init callback ==
        checkpoint_callback = ModelCheckpoint(
            monitor="valid_loss",
            dirpath=CONFIG["output_dir"],
            save_top_k=1,
            save_last=False,
            save_weights_only=True,
            filename=f"fold_{fold}",
            mode="min",
        )

        early_stopping = EarlyStopping(
            monitor="valid_loss",
            min_delta=0.00,
            patience=CONFIG["patience"],
            verbose=False,
            mode="min",
        )

        callbacks_to_use = [
            checkpoint_callback,
            early_stopping,
            TQDMProgressBar(refresh_rate=1),
        ]

        # == init trainer ==
        trainer = Trainer(
            max_epochs=CONFIG["epochs"],
            val_check_interval=0.5,
            callbacks=callbacks_to_use,
            enable_model_summary=False,
            accelerator="gpu" if torch.cuda.is_available() else "cpu",
            deterministic=True,
            precision="16-mixed" if CONFIG["mixed_precision"] else 32,
            # logger=wandb_logger,
        )

        # == Training ==
        trainer.fit(
            model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader
        )

        # == OOF ==
        model.to(CONFIG["device"])
        model.eval()
        predictions = []
        gts = []
        for batch in tqdm(val_dataloader):
            with torch.no_grad():
                x, gt = batch
                x = x.cuda()
                outputs = model(x)
            predictions.append(outputs.detach().cpu())
            gts.append(gt.detach().cpu())

        predictions = torch.cat(predictions, dim=0).cpu().detach()
        oof[valid_idx] = predictions.numpy().astype(np.float32)

        ckpt = CONFIG["output_dir"] / f"fold_{fold}.ckpt"
        models.append(ckpt)
    return models, oof

In [14]:
def predict(X: pd.DataFrame, ckpt_list: list):
    preds = []

    for ckpt in ckpt_list:
        # == init model ==
        model = MLPModel(
            config=CONFIG,
            input_dim=X.shape[1],
            output_dim=18,
        )

        # == load ckpt ==
        weights = torch.load(ckpt, map_location=torch.device(CONFIG['device']))['state_dict']
        model.load_state_dict(weights)
        model.to(CONFIG['device'])
        model.eval()

        # == create dataset & dataloader ==
        test_dataset = TestDataset(X)
        test_loader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=CONFIG['valid_batch_size'],
            num_workers=CONFIG["n_workers"],
            shuffle=False,
            drop_last=False
        )
        start = time.time()
        pred = []
        for batch in tqdm(test_loader):
            with torch.no_grad():
                x = batch
                x = x.to(CONFIG["device"])

                outputs = model(x)
                outputs = outputs.detach().cpu().numpy()
                # sigmoid
                # outputs = scipy.special.expit(outputs)
            pred.append(outputs)

        pred = np.concatenate(pred, axis=0)

        preds.append(pred)
        elapsed_time = time.time() - start
        print(f'elapsed time: {elapsed_time:.5f}sec')
        gc.collect()

    return np.mean(preds, axis=0)

def evaluate(y_true: pd.DataFrame, y_pred: pd.DataFrame):
    return np.mean(np.abs(y_true - y_pred))

In [15]:
train_df = preprocess(train_df)

origin_test_ids = test_df['ID'].to_pandas()
test_df = preprocess(test_df)

print(train_df.shape)
print(test_df.shape)

(43371, 72)
(1727, 54)


In [16]:
remove_columns = ['ID']
X = train_df.drop(remove_columns + TARGET_COLS).to_pandas().fillna(-1)
y = train_df[TARGET_COLS].to_pandas()

test_X = test_df.drop(remove_columns).to_pandas().fillna(-1)

oe = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
X[CAT_COLS] = oe.fit_transform(X[CAT_COLS])
test_X[CAT_COLS] = oe.transform(test_X[CAT_COLS])

print(X.shape)
print(test_X.shape)

(43371, 53)
(1727, 53)


In [17]:
scaler = StandardScaler()
num_cols = X.columns[~X.columns.isin(CAT_COLS)]
X[num_cols] = scaler.fit_transform(X[num_cols])
test_X[num_cols] = scaler.transform(test_X[num_cols])

In [19]:
import shutil

if CONFIG['output_dir'].exists():
    shutil.rmtree(CONFIG['output_dir'])
    CONFIG['output_dir'].mkdir(parents=True, exist_ok=True)

In [None]:
%%time

models_dict = {}
oof = pd.DataFrame(np.zeros_like(train_df.to_pandas().loc[:, TARGET_COLS]), columns=TARGET_COLS)

preds = test_df.select(['ID']).to_pandas()
preds[TARGET_COLS] = 0.0

ckpt_list, partial_oofs = train(X, y)

INFO:pytorch_lightning.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.utilities.rank_zero:You are using a CUDA device ('NVIDIA A100-SXM4-40GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision




INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

In [None]:
oof.loc[:, :] = partial_oofs
preds.loc[:, TARGET_COLS] = predict(test_X.drop(columns=['scene']), ckpt_list)

In [None]:
for target in TARGET_COLS:
    print(f'{target} CV score: ', evaluate(y[target], oof[target]))
score = evaluate(y, oof)
print('Total CV score: ', score)

In [None]:
submission = pd.DataFrame(origin_test_ids).merge(preds, on='ID', how='left').drop(columns=['ID'])

output_path = OUTPUT_DIR / f'{exp_name}_{score:.4f}_submission.csv'
if output_path.exists():
    assert False, f'output file already exists. {output_path}'

submission.to_csv(output_path, index=False)
submission