In [None]:
!pip install -q dask[dataframe]

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/243.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m243.2/243.2 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
import os
import glob
from argparse import Namespace
from joblib import Parallel, delayed

from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold, StratifiedGroupKFold
import xgboost as xgb
from xgboost import XGBRegressor

print("xgboost version:", xgb.__version__)

xgboost version: 2.1.2


In [None]:
config = dict(
    exp = "xx",
    input_dir = "xxxxx",
    output_dir = "xxxx",
    image_dir = "xxxx",
    n_folds = 5,
    seed = 0,
    learning_rate = 0.003,
    max_depth = 11,
    min_child_weight = 9,
    gamma = 0.15,
    reg_alpha = 3.25,
    reg_lambda = 1.38,
    subsample = 0.8,
    colsample_bytree = 0.265,
)
config = Namespace(**config)

In [None]:
####################
### Load Data
####################
train_df = pd.read_csv(os.path.join(config.output_dir, "train_features_w_cluster.csv"))
train_df["scene_id"] = train_df["ID"].apply(lambda x: x.split("_")[0])
train_df["frame_id"] = train_df["ID"].apply(lambda x: x.split("_")[1]).astype("int")
train_df = train_df.sort_values(["scene_id", "frame_id"], ascending=True).reset_index(drop=True)
train_scene_id2num = train_df["scene_id"].value_counts().to_dict()
train_df.index = train_df.ID
display(train_df.head())
display(train_df.shape)

test_df = pd.read_csv(os.path.join(config.output_dir, "test_features_w_cluster.csv"))
test_df["scene_id"] = test_df["ID"].apply(lambda x: x.split("_")[0])
test_df["frame_id"] = test_df["ID"].apply(lambda x: x.split("_")[1]).astype("int")
test_df = test_df.sort_values(["scene_id", "frame_id"], ascending=True).reset_index(drop=True)
test_scene_id2num = test_df["scene_id"].value_counts().to_dict()
test_df.index = test_df.ID
display(test_df.head())
display(test_df.shape)

Unnamed: 0_level_0,ID,vEgo,aEgo,steeringAngleDeg,steeringTorque,brake,brakePressed,gas,gasPressed,gearShifter,leftBlinker,rightBlinker,x_0,y_0,z_0,x_1,y_1,z_1,x_2,y_2,z_2,x_3,y_3,z_3,x_4,y_4,z_4,x_5,y_5,z_5,cluster,scene_id,frame_id
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1
00066be8e20318869c38c66be466631a_320,00066be8e20318869c38c66be466631a_320,5.701526,1.538456,-2.165777,-139.0,0.0,False,0.25,True,drive,False,False,2.82959,0.032226,0.045187,6.231999,0.065895,0.107974,9.785009,0.124972,0.203649,13.485472,0.163448,0.302818,17.574227,0.174289,0.406331,21.951269,0.199503,0.485079,0,00066be8e20318869c38c66be466631a,320
00066be8e20318869c38c66be466631a_420,00066be8e20318869c38c66be466631a_420,11.176292,0.279881,-11.625697,-44.0,0.0,False,0.0,False,drive,False,True,4.970268,-0.007936,0.005028,10.350489,-0.032374,-0.020701,15.770054,0.084073,0.008645,21.132415,0.391343,0.036335,26.316489,0.843124,0.065,31.383814,1.42507,0.073083,2,00066be8e20318869c38c66be466631a,420
00066be8e20318869c38c66be466631a_520,00066be8e20318869c38c66be466631a_520,10.472548,0.231099,-2.985105,-132.0,0.0,False,0.18,True,drive,False,False,4.815701,-0.000813,0.017577,10.153522,-0.0278,0.026165,15.446539,-0.155987,0.040397,20.61816,-0.356932,0.058765,25.677387,-0.576985,0.102859,30.460033,-0.841894,0.152889,0,00066be8e20318869c38c66be466631a,520
000fb056f97572d384bae4f5fc1e0f28_20,000fb056f97572d384bae4f5fc1e0f28_20,3.316744,1.276733,-31.725477,-114.0,0.0,False,0.255,True,drive,False,False,1.55186,-0.041849,-0.008847,3.675162,-0.125189,-0.013725,6.113567,-0.239161,-0.012887,8.770783,-0.381813,-0.003898,11.619313,-0.554488,0.011393,14.657048,-0.7788,0.044243,0,000fb056f97572d384bae4f5fc1e0f28,20
000fb056f97572d384bae4f5fc1e0f28_120,000fb056f97572d384bae4f5fc1e0f28_120,6.055565,-0.117775,7.632668,173.0,0.0,False,0.0,False,drive,False,False,2.812608,0.033731,0.0059,5.975378,0.137848,0.01621,9.186793,0.322997,0.031626,12.37311,0.603145,0.031858,15.703514,0.960717,0.043479,19.311182,1.374655,0.058754,2,000fb056f97572d384bae4f5fc1e0f28,120


(43371, 33)

Unnamed: 0_level_0,ID,vEgo,aEgo,steeringAngleDeg,steeringTorque,brake,brakePressed,gas,gasPressed,gearShifter,leftBlinker,rightBlinker,cluster,scene_id,frame_id
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
012baccc145d400c896cb82065a93d42_120,012baccc145d400c896cb82065a93d42_120,3.374273,-0.01936,-34.008415,17.0,0.0,False,0.0,False,drive,False,False,2,012baccc145d400c896cb82065a93d42,120
012baccc145d400c896cb82065a93d42_220,012baccc145d400c896cb82065a93d42_220,2.441048,-0.022754,307.860077,295.0,0.0,True,0.0,False,drive,False,False,1,012baccc145d400c896cb82065a93d42,220
012baccc145d400c896cb82065a93d42_320,012baccc145d400c896cb82065a93d42_320,3.604152,-0.286239,10.774388,-110.0,0.0,True,0.0,False,drive,False,False,1,012baccc145d400c896cb82065a93d42,320
012baccc145d400c896cb82065a93d42_420,012baccc145d400c896cb82065a93d42_420,2.048902,-0.537628,61.045235,189.0,0.0,True,0.0,False,drive,True,False,1,012baccc145d400c896cb82065a93d42,420
01d738e799d260a10f6324f78023b38f_120,01d738e799d260a10f6324f78023b38f_120,2.201528,-1.8986,5.740093,-41.0,0.0,True,0.0,False,drive,False,False,1,01d738e799d260a10f6324f78023b38f,120


(1727, 15)

In [None]:
cols = ["vEgo", "aEgo", "steeringAngleDeg", "steeringTorque", "gas"]

# 標準的な統計
train_stat_df = train_df.groupby("scene_id").agg({
    col: ["max", "min", "mean", "std"] for col in cols
}).reset_index()
train_stat_df.columns = ["_".join(col) if col != ("scene_id", "") else "scene_id" for col in train_stat_df.columns]

test_stat_df = test_df.groupby("scene_id").agg({
    col: ["max", "min", "mean", "std"] for col in cols
}).reset_index()
test_stat_df.columns = ["_".join(col) if col != ("scene_id", "") else "scene_id" for col in test_stat_df.columns]

# 時系列処理
shift_diff_train_df = train_df.groupby("scene_id").apply(
    lambda group: pd.DataFrame({
        **{f"{col}_shift1": group[col].shift(1) for col in cols},
        **{f"{col}_shift-1": group[col].shift(-1) for col in cols},
        **{f"{col}_diff1": group[col].diff(1) for col in cols},
        **{f"{col}_diff-1": group[col].diff(-1) for col in cols}
    })
).reset_index(level=1).reset_index(drop=True)

shift_diff_test_df = test_df.groupby("scene_id").apply(
    lambda group: pd.DataFrame({
        **{f"{col}_shift1": group[col].shift(1) for col in cols},
        **{f"{col}_shift-1": group[col].shift(-1) for col in cols},
        **{f"{col}_diff1": group[col].diff(1) for col in cols},
        **{f"{col}_diff-1": group[col].diff(-1) for col in cols}
    })
).reset_index(level=1).reset_index(drop=True)

  shift_diff_train_df = train_df.groupby("scene_id").apply(
  shift_diff_test_df = test_df.groupby("scene_id").apply(


In [None]:
train_df = train_df.merge(train_stat_df, on="scene_id", how="left")
test_df = test_df.merge(test_stat_df, on="scene_id", how="left")

train_df = train_df.merge(shift_diff_train_df, on=["ID"], how="left")
test_df = test_df.merge(shift_diff_test_df, on=["ID"], how="left")

In [None]:
# 信号機情報
traffic_lights_df = pd.read_csv(os.path.join(config.input_dir, "traffic_lights_df.csv"))
id2cls = dict(zip(traffic_lights_df["ID"],traffic_lights_df["class"]))
display(traffic_lights_df["class"].value_counts())

# エンコード
encode_label = np.diag(np.ones(len(traffic_lights_df["class"].unique()) + 1)) # +1はnodetectedのため
class2onehot = {f"light_{cls}": encode_label[i] for i, cls in enumerate(traffic_lights_df["class"].unique())}
class2onehot["light_nodetected"] = encode_label[-1]

# onehot化
train_light_class = pd.DataFrame(
    np.stack(train_df["ID"].apply(lambda x: class2onehot[f"light_{id2cls[x]}"] if x in id2cls else class2onehot["light_nodetected"])).astype(int),
    columns=[f"light_{cls}" for cls in traffic_lights_df["class"].unique()] + ["light_nodetected"]
)
test_light_class = pd.DataFrame(
    np.stack(test_df["ID"].apply(lambda x: class2onehot[f"light_{id2cls[x]}"] if x in id2cls else class2onehot["light_nodetected"])).astype(int),
    columns=[f"light_{cls}" for cls in traffic_lights_df["class"].unique()] + ["light_nodetected"]
)
train_df = pd.concat([train_df, train_light_class], axis=1)
test_df = pd.concat([test_df, test_light_class], axis=1)

Unnamed: 0_level_0,count
class,Unnamed: 1_level_1
green,5879
red,4915
empty,1352
yellow,682
straight,647
left,576
right,306
other,60


In [None]:
# gearShifterはdriveかparkのみ使用
train_df["gearDrive"] = (train_df["gearShifter"] == "drive").astype(int)
test_df["gearDrive"] = (test_df["gearShifter"] == "drive").astype(int)
train_df["gearPark"] = (train_df["gearShifter"] == "park").astype(int)
test_df["gearPark"] = (test_df["gearShifter"] == "park").astype(int)

train_df = train_df.drop(columns=["gearShifter"])
test_df = test_df.drop(columns=["gearShifter"])

In [None]:
# 画像情報
from sklearn.decomposition import TruncatedSVD

# IDの順番がわからなくなったので、IDだけ読み込む
train_ids = pd.read_csv(os.path.join(config.output_dir, "train_features_w_cluster.csv"))["ID"].values
test_ids = pd.read_csv(os.path.join(config.output_dir, "test_features_w_cluster.csv"))["ID"].values

# SVD
svd_dim = 10
train_image_feats = np.load(os.path.join(config.output_dir, f"train_DETR_backbone_feats_t.npy")) # N, 8, 2048
test_image_feats = np.load(os.path.join(config.output_dir, f"test_DETR_backbone_feats_t.npy"))
all_image_feats = np.concatenate([train_image_feats, test_image_feats], axis=0)

feat_svd_df = pd.concat([
    pd.DataFrame(TruncatedSVD(n_components=svd_dim, random_state=config.seed).fit_transform(all_image_feats[:, i, :]), columns=[f"SVD{i}_{j}" for j in range(svd_dim)])
    for i in range(all_image_feats.shape[1])
], axis=1)

train_feat_svd_df = feat_svd_df[:len(train_ids)].reset_index(drop=True)
test_feat_svd_df = feat_svd_df[len(train_ids):].reset_index(drop=True)

del feat_svd_df, train_image_feats, test_image_feats, all_image_feats

In [None]:
# 与えられる3点の画像の類似度
train_corrs_0_05 = np.load(os.path.join(config.output_dir, "train_DETR_backbone_feats_t_t-0.5_corrs.npy")) # 現時点と-0.5との類似度
train_corrs_05_1 = np.load(os.path.join(config.output_dir, "train_DETR_backbone_feats_t-0.5_t-1.0_corrs.npy")) # -0.5と-1.0との類似度

test_corrs_0_05 = np.load(os.path.join(config.output_dir, "test_DETR_backbone_feats_t_t-0.5_corrs.npy")) # 現時点と-0.5との類似度
test_corrs_05_1 = np.load(os.path.join(config.output_dir, "test_DETR_backbone_feats_t-0.5_t-1.0_corrs.npy")) # -0.5と-1.0との類似度

train_corrs_df = pd.DataFrame(np.concatenate([train_corrs_0_05, train_corrs_05_1], axis=1), columns=[f"corr_t_0_05_{i}" for i in range(8)]+[f"corr_t_05_1_{i}" for i in range(8)])
test_corrs_df = pd.DataFrame(np.concatenate([test_corrs_0_05, test_corrs_05_1], axis=1), columns=[f"corr_t_0_05_{i}" for i in range(8)]+[f"corr_t_05_1_{i}" for i in range(8)])

train_image_feat_df = pd.concat([train_feat_svd_df, train_corrs_df], axis=1)
train_image_feat_df["ID"] = train_ids
train_df = train_df.merge(train_image_feat_df, on="ID", how="left")

test_image_feat_df = pd.concat([test_feat_svd_df, test_corrs_df], axis=1)
test_image_feat_df["ID"] = test_ids
test_df = test_df.merge(test_image_feat_df, on="ID", how="left")

del train_feat_svd_df, train_corrs_df, test_feat_svd_df, test_corrs_df, train_image_feat_df, test_image_feat_df

In [None]:
# brakeカラムは全部0なので除外
train_df = train_df.drop(columns=["brake"])
test_df = test_df.drop(columns=["brake"])

In [None]:
# Load OOF
targets = ['x_0', 'y_0', 'z_0', 'x_1', 'y_1', 'z_1', 'x_2', 'y_2', 'z_2', 'x_3', 'y_3', 'z_3', 'x_4', 'y_4', 'z_4', 'x_5', 'y_5', 'z_5']
noise_std = [0.03, 0.015, 0.01, 0.05, 0.03, 0.02, 0.08, 0.04, 0.025, 0.1, 0.065, 0.035, 0.15, 0.09, 0.045, 0.2, 0.13, 0.05]

np.random.seed(config.seed)

## OOFのパスは適宜設定してください.

oof_df = pd.read_csv(os.path.join(config.output_dir, f"37_convnext_large", "oof_predictions.csv"))
# adding gausian noise
for i, target in enumerate(targets):
    oof_df.loc[:, target] += 0.5 * np.random.normal(0, noise_std[i], len(oof_df))
oof_df = oof_df.set_index("ID").loc[train_df["ID"].values].reset_index(drop=True)

test_preds_df = pd.read_csv(os.path.join(config.output_dir, f"37_convnext_large", "test_predictions.csv"))
# adding gausian noise
for i, target in enumerate(targets):
    test_preds_df.loc[:, target] += 0.5 * np.random.normal(0, noise_std[i], len(test_preds_df))
test_preds_df = test_preds_df.set_index("ID").loc[test_df["ID"].values].reset_index(drop=True)

columns = [f"convnext_large_v37_{col}" for col in oof_df.columns]
oof_df.columns = columns
test_preds_df.columns = columns

train_df = pd.concat([train_df, oof_df], axis=1)
test_df = pd.concat([test_df, test_preds_df], axis=1)

In [None]:
# Load OOF
targets = ['x_0', 'y_0', 'z_0', 'x_1', 'y_1', 'z_1', 'x_2', 'y_2', 'z_2', 'x_3', 'y_3', 'z_3', 'x_4', 'y_4', 'z_4', 'x_5', 'y_5', 'z_5']
noise_std = [0.03, 0.015, 0.01, 0.05, 0.03, 0.02, 0.08, 0.04, 0.025, 0.1, 0.065, 0.035, 0.15, 0.09, 0.045, 0.2, 0.13, 0.05]

np.random.seed(config.seed)

## OOFのパスは適宜設定してください.

oof_df = pd.read_csv(os.path.join(config.output_dir, f"54_convnext_large", "oof_predictions.csv"))
# adding gausian noise
for i, target in enumerate(targets):
    oof_df.loc[:, target] += 0.5 * np.random.normal(0, noise_std[i], len(oof_df))
oof_df = oof_df.set_index("ID").loc[train_df["ID"].values].reset_index(drop=True)

test_preds_df = pd.read_csv(os.path.join(config.output_dir, f"54_convnext_large", "test_predictions.csv"))
# adding gausian noise
for i, target in enumerate(targets):
    test_preds_df.loc[:, target] += 0.5 * np.random.normal(0, noise_std[i], len(test_preds_df))
test_preds_df = test_preds_df.set_index("ID").loc[test_df["ID"].values].reset_index(drop=True)

columns = [f"convnext_large_v54_{col}" for col in oof_df.columns]
oof_df.columns = columns
test_preds_df.columns = columns

train_df = pd.concat([train_df, oof_df], axis=1)
test_df = pd.concat([test_df, test_preds_df], axis=1)

In [None]:
targets = ['x_0', 'y_0', 'z_0', 'x_1', 'y_1', 'z_1', 'x_2', 'y_2', 'z_2', 'x_3', 'y_3', 'z_3', 'x_4', 'y_4', 'z_4', 'x_5', 'y_5', 'z_5']
col_nouse = ["ID", "scene_id", "frame_id", "cluster"] + targets
col_touse = [col for col in train_df.columns if col not in col_nouse]

In [None]:
# MAEを計算
def evaluation(true_values, pred_values):
    abs_diff = abs(true_values - pred_values)
    mae = np.mean(abs_diff.reshape(-1,))
    return mae

def train_xgb(target):
    params = {
        "objective": "reg:squarederror",  # MAE can be used as evaluation metric
        "n_jobs": -1,
        "seed": config.seed,
        "learning_rate": config.learning_rate,
        "tree_method": "hist",
        "n_estimators": 10000,
        "early_stopping_rounds": 100,
        "eval_metric": "mae",
        "max_depth": config.max_depth,
        "min_child_weight": config.min_child_weight,
        "gamma": config.gamma,
        "reg_alpha": config.reg_alpha,
        "reg_lambda": config.reg_lambda,
        "subsample": config.subsample,
        "colsample_bytree": config.colsample_bytree,
    }

    oof_pred = np.zeros(len(train_df))
    y_pred = np.zeros(len(test_df))
    models = []
    cv_scores = {}

    sgkf = StratifiedGroupKFold(n_splits=config.n_folds, shuffle=True, random_state=config.seed)

    for fold, (train_index, test_index) in enumerate(sgkf.split(train_df, train_df["cluster"], groups=train_df["scene_id"])):

        print(f"====== fold {fold} ======")

        # TrainとTestに分割
        x_train, x_val = train_df.loc[train_index, col_touse], train_df.loc[test_index, col_touse]
        y_train, y_val = train_df.loc[train_index, target], train_df.loc[test_index, target]

        test = test_df[col_touse]

        model = XGBRegressor(**params)
        model.fit(
            x_train, y_train,
            eval_set=[(x_train, y_train), (x_val, y_val)],
            verbose=100
        )
        fold_pred = model.predict(x_val)
        score = evaluation(y_val.values, fold_pred)
        cv_scores[f"cv{fold}"] = score

        oof_pred[test_index] = fold_pred

        y_pred += model.predict(test) / config.n_folds

        print(f"cv score is {score}")

    oof_score = evaluation(train_df[target].values.reshape(-1), oof_pred)
    print(f"OOF score is {oof_score}")

    return oof_pred, y_pred, models

In [None]:
models_dict = {}
test_pred = []
oof_pred = []
for target in targets:
    print("=" * 50)
    print(f"# {target}")
    print("=" * 50)
    oof_preds_partial, y_pred_partial, models_partial = train_xgb(target)
    models_dict[target] = models_partial
    oof_pred.append(oof_preds_partial)
    test_pred.append(y_pred_partial)

# x_0
[0]	validation_0-mae:2.75865	validation_1-mae:2.78447
[100]	validation_0-mae:2.04721	validation_1-mae:2.06727
[200]	validation_0-mae:1.51952	validation_1-mae:1.53474
[300]	validation_0-mae:1.12764	validation_1-mae:1.13908
[400]	validation_0-mae:0.83812	validation_1-mae:0.84695
[500]	validation_0-mae:0.62383	validation_1-mae:0.63084
[600]	validation_0-mae:0.46500	validation_1-mae:0.47058
[700]	validation_0-mae:0.34802	validation_1-mae:0.35264
[800]	validation_0-mae:0.26234	validation_1-mae:0.26631
[900]	validation_0-mae:0.20029	validation_1-mae:0.20382
[1000]	validation_0-mae:0.15578	validation_1-mae:0.15904
[1100]	validation_0-mae:0.12445	validation_1-mae:0.12771
[1200]	validation_0-mae:0.10277	validation_1-mae:0.10614
[1300]	validation_0-mae:0.08812	validation_1-mae:0.09155
[1400]	validation_0-mae:0.07839	validation_1-mae:0.08191
[1500]	validation_0-mae:0.07203	validation_1-mae:0.07563
[1600]	validation_0-mae:0.06792	validation_1-mae:0.07152
[1700]	validation_0-mae:0.06532	valid

In [None]:
test_pred = np.stack(test_pred, axis=1)
oof_pred = np.stack(oof_pred, axis=1)
np.save(os.path.join(config.output_dir, f"{config.exp}_test_pred.npy"), test_pred)
np.save(os.path.join(config.output_dir, f"{config.exp}_oof_pred.npy"), oof_pred)

import pickle
pickle.dump(models_dict, open(os.path.join(config.output_dir, f"{config.exp}_models_dict.pkl"), "wb"))

In [None]:
from joblib import Parallel, delayed

def get_trajectory(row):
    TARGET_COLUMNS = ['x_0', 'y_0', 'z_0', 'x_1', 'y_1', 'z_1', 'x_2', 'y_2', 'z_2', 'x_3', 'y_3', 'z_3', 'x_4', 'y_4', 'z_4', 'x_5', 'y_5', 'z_5']

    # データフレームのターゲット情報から可視化可能なtrajectoryに変換
    pivot_df = row[TARGET_COLUMNS].to_frame().reset_index()

    pivot_df.columns = ['coordinate', 'value']

    # 座標軸(x,y,z)と番号(0-5)を正規表現で抽出
    # 例：'x_0' -> axis='x', number='0'
    pivot_df[['axis', 'number']] = pivot_df['coordinate'].str.extract(r'([xyz])_(\d+)')

    # ピボットテーブルを作成：
    # - インデックス：番号(0-5)
    # - カラム：座標軸(x,y,z)
    # - 値：対応する座標値
    trajectory = pivot_df.pivot(index='number', columns='axis', values='value')

    # インデックスを数値型に変換
    trajectory.index = trajectory.index.astype(int)

    # インデックスでソートし、numpy配列に変換
    trajectory = trajectory.sort_index().values
    return trajectory

# Fetch trajectories in advance
train_trajectories = np.stack(Parallel(n_jobs=4)(delayed(get_trajectory)(row) for _, row in tqdm(train_df.iterrows(), total=len(train_df), desc="Fetching trajectories....")), axis=0)
train_trajectories = train_trajectories.astype("float")

Fetching trajectories....: 100%|██████████| 43371/43371 [00:35<00:00, 1206.12it/s]


In [None]:
oof_pred = np.load(os.path.join(config.output_dir, f"{config.exp}_oof_pred.npy"))
np.mean(np.abs(oof_pred - train_trajectories.reshape(len(train_df), -1)))

0.19668316613392473

In [None]:
targets = ['x_0', 'y_0', 'z_0', 'x_1', 'y_1', 'z_1', 'x_2', 'y_2', 'z_2', 'x_3', 'y_3', 'z_3', 'x_4', 'y_4', 'z_4', 'x_5', 'y_5', 'z_5']

# oof_pred = np.load(os.path.join(config.output_dir, f"{config.exp}_oof_pred.npy"))
oof_pred = pd.DataFrame(oof_pred, columns=targets, index=train_df["ID"].values).reset_index(names="ID")
oof_pred.to_csv(os.path.join(config.output_dir, f"{config.exp}_oof_predictions.csv"), index=False)

In [None]:
targets = ['x_0', 'y_0', 'z_0', 'x_1', 'y_1', 'z_1', 'x_2', 'y_2', 'z_2', 'x_3', 'y_3', 'z_3', 'x_4', 'y_4', 'z_4', 'x_5', 'y_5', 'z_5']

test_preds = np.load(os.path.join(config.output_dir, f"{config.exp}_test_pred.npy"))
test_preds = pd.DataFrame(test_preds, columns=targets, index=test_df["ID"].values)

test_ids = pd.read_csv(os.path.join(config.input_dir, "test_features.csv"))["ID"].values
test_preds = test_preds.loc[test_ids].reset_index(drop=True)
display(test_preds.head())

test_preds.to_csv(os.path.join(config.output_dir, f"{config.exp}_submission.csv"), index=False)

Unnamed: 0,x_0,y_0,z_0,x_1,y_1,z_1,x_2,y_2,z_2,x_3,y_3,z_3,x_4,y_4,z_4,x_5,y_5,z_5
0,1.485772,-0.011671,0.000472,3.050463,-0.03086,0.001569,4.680688,-0.020935,0.012046,6.334923,-0.004369,0.026385,7.935379,0.03348,0.03468,9.378392,0.149505,0.041442
1,0.954765,0.315889,-0.000582,1.86024,0.862444,-0.007769,2.705172,1.621704,-0.011979,3.543743,2.520183,-0.014789,4.231369,3.595199,-0.013194,4.783461,4.802906,-0.007903
2,1.597676,0.008301,-0.000795,3.265235,0.013739,-0.007543,4.799396,0.02462,-0.00936,6.247473,0.019319,-0.007989,7.745499,0.021737,-0.011488,9.26811,0.030441,-0.009579
3,0.866239,0.065679,-0.001379,1.652521,0.187929,-0.011463,2.358312,0.411195,-0.013042,2.977259,0.769852,-0.016289,3.49694,1.318784,-0.018574,3.899815,1.958461,-0.017069
4,0.83111,0.002406,-0.000772,1.481703,0.006039,-0.011422,1.926813,0.016602,-0.028076,2.171279,0.024461,-0.049999,2.087519,0.020694,-0.068266,1.767519,0.02238,-0.078957
