In [1]:
import numpy as np
import pandas as pd
from src.dataloader_ import *
from src.network_ import *
from src.utils import *

import os
import sys
import gc
import pickle
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer, KNNImputer

train_series_dir = "../../inputs/series_train.parquet/"
test_series_dir = "../../inputs/series_test.parquet/"

data_dic_path = "../../inputs/data_dictionary.csv"
sample_submission_path = "../../inputs/sample_submission.csv"
train_path = "../../inputs/train.csv"
test_path = "../../inputs/test.csv"

train = pd.read_csv(train_path)
test = pd.read_csv(test_path)
sample_submission = pd.read_csv(sample_submission_path)
data_dic = pd.read_csv(data_dic_path)

import os
import random

import numpy as np
import torch


def seed_torch(seed=1029):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True


nb_name = os.path.basename(os.getcwd())  # notebook name
seed_torch(seed=42)

In [2]:
feature_imputer = SimpleImputer(strategy="mean")
sii_imputer = SimpleImputer(strategy="mean")

numeric_cols = test.select_dtypes(include=["float64", "int64"]).columns
numeric_feature_cols = numeric_cols.copy()
# numeric_feature_cols = numeric_feature_cols.drop("sii")

numeric_sii_cols = train.select_dtypes(include=["float64", "int64"]).columns

sii_inputed = sii_imputer.fit_transform(train[numeric_sii_cols])
feature_imputer.fit(test[numeric_feature_cols])
feature_inputed = feature_imputer.fit_transform(train[numeric_feature_cols])

train_imputed = pd.DataFrame(feature_inputed, columns=numeric_feature_cols)

for col in train.columns:
    if col not in numeric_cols:
        train_imputed[col] = train[col]

train = train_imputed

sii_impute = pd.DataFrame(sii_inputed, columns=numeric_sii_cols)
sii_impute["sii"] = sii_impute["sii"].round().astype(int)
# train["sii"] = sii_impute["sii"]

with open("feature_imputer.pkl", "wb") as f:
    pickle.dump(feature_imputer, f)

train = train.dropna(thresh=10, axis=0)

train.head()

Unnamed: 0,Basic_Demos-Age,Basic_Demos-Sex,CGAS-CGAS_Score,Physical-BMI,Physical-Height,Physical-Weight,Physical-Waist_Circumference,Physical-Diastolic_BP,Physical-HeartRate,Physical-Systolic_BP,...,PCIAT-PCIAT_15,PCIAT-PCIAT_16,PCIAT-PCIAT_17,PCIAT-PCIAT_18,PCIAT-PCIAT_19,PCIAT-PCIAT_20,PCIAT-PCIAT_Total,SDS-Season,PreInt_EduHx-Season,sii
0,5.0,0.0,51.0,16.877316,46.0,50.8,27.278508,69.648951,81.597236,116.983074,...,4.0,4.0,4.0,4.0,2.0,4.0,55.0,,Fall,2.0
1,9.0,0.0,65.454771,14.03559,48.0,46.0,22.0,75.0,70.0,122.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Fall,Summer,0.0
2,10.0,1.0,71.0,16.648696,56.5,75.6,27.278508,65.0,94.0,117.0,...,1.0,0.0,2.0,2.0,1.0,1.0,28.0,Fall,Summer,0.0
3,9.0,0.0,71.0,18.292347,56.0,81.6,27.278508,60.0,97.0,117.0,...,0.0,3.0,4.0,3.0,4.0,1.0,44.0,Summer,Winter,1.0
4,18.0,1.0,65.454771,19.331929,55.946713,89.038615,27.278508,69.648951,81.597236,116.983074,...,,,,,,,,,,


In [3]:
train["sii"].value_counts()

sii
0.0    1594
1.0     730
2.0     378
3.0      34
Name: count, dtype: int64

### テーブルデータセット

In [4]:
add_features = [
    "BMI_Age",
    "Internet_Hours_Age",
    "BMI_Internet_Hours",
    "BFP_BMI",
    "FFMI_BFP",
    "FMI_BFP",
    "LST_TBW",
    "BFP_BMR",
    "BFP_DEE",
    "BMR_Weight",
    "DEE_Weight",
    "SMM_Height",
    "Muscle_to_Fat",
    "Hydration_Status",
    "ICW_TBW",
]


def feature_engineering(df):
    # season_cols = [col for col in df.columns if "Season" in col]
    # df = df.drop(season_cols, axis=1)
    df["BMI_Age"] = df["Physical-BMI"] * df["Basic_Demos-Age"]
    df["Internet_Hours_Age"] = (
        df["PreInt_EduHx-computerinternet_hoursday"] * df["Basic_Demos-Age"]
    )
    df["BMI_Internet_Hours"] = (
        df["Physical-BMI"] * df["PreInt_EduHx-computerinternet_hoursday"]
    )
    df["BFP_BMI"] = df["BIA-BIA_Fat"] / df["BIA-BIA_BMI"]
    df["FFMI_BFP"] = df["BIA-BIA_FFMI"] / df["BIA-BIA_Fat"]
    df["FMI_BFP"] = df["BIA-BIA_FMI"] / df["BIA-BIA_Fat"]
    df["LST_TBW"] = df["BIA-BIA_LST"] / df["BIA-BIA_TBW"]
    df["BFP_BMR"] = df["BIA-BIA_Fat"] * df["BIA-BIA_BMR"]
    df["BFP_DEE"] = df["BIA-BIA_Fat"] * df["BIA-BIA_DEE"]
    df["BMR_Weight"] = df["BIA-BIA_BMR"] / df["Physical-Weight"]
    df["DEE_Weight"] = df["BIA-BIA_DEE"] / df["Physical-Weight"]
    df["SMM_Height"] = df["BIA-BIA_SMM"] / df["Physical-Height"]
    df["Muscle_to_Fat"] = df["BIA-BIA_SMM"] / df["BIA-BIA_FMI"]
    df["Hydration_Status"] = df["BIA-BIA_TBW"] / df["Physical-Weight"]
    df["ICW_TBW"] = df["BIA-BIA_ICW"] / df["BIA-BIA_TBW"]

    return df


train = feature_engineering(train)
train = train.replace([np.inf, -np.inf], np.nan)
for add_ in add_features:
    train[add_] = train[add_].fillna(0.0)
train = train.dropna(thresh=10, axis=0)
test = feature_engineering(test)

In [5]:
# onehotEncoderの作成
from sklearn.preprocessing import OneHotEncoder

categorical_columns = [
    "Basic_Demos-Enroll_Season",
    "CGAS-Season",
    "Physical-Season",
    "PAQ_C-Season",
    "FGC-Season",
    "Fitness_Endurance-Season",
    "PAQ_A-Season",
    "BIA-Season",
    "SDS-Season",
    "PreInt_EduHx-Season",
]

double_columns = [
    "FGC-FGC_SRR_Zone",
    "BIA-BIA_SMM",
    "Physical-Waist_Circumference",
    "BIA-BIA_FFMI",
    "FGC-FGC_CU",
    "PreInt_EduHx-computerinternet_hoursday",
    "BIA-BIA_ECW",
    "FGC-FGC_CU_Zone",
    "FGC-FGC_SRL_Zone",
    "BIA-BIA_DEE",
    "Physical-Weight",
    "Fitness_Endurance-Time_Mins",
    "FGC-FGC_SRR",
    "SDS-SDS_Total_T",
    "FGC-FGC_PU",
    "BIA-BIA_FFM",
    "FGC-FGC_TL_Zone",
    "Physical-BMI",
    "Physical-Systolic_BP",
    "Physical-HeartRate",
    "BIA-BIA_ICW",
    "Physical-Height",
    "FGC-FGC_SRL",
    "BIA-BIA_BMC",
    "Fitness_Endurance-Time_Sec",
    "BIA-BIA_Frame_num",
    "Basic_Demos-Age",
    "FGC-FGC_GSND_Zone",
    "Basic_Demos-Sex",
    "FGC-FGC_GSND",
    "BIA-BIA_LST",
    "FGC-FGC_TL",
    "BIA-BIA_BMI",
    "BIA-BIA_FMI",
    "PAQ_C-PAQ_C_Total",
    "BIA-BIA_Activity_Level_num",
    "FGC-FGC_GSD",
    "BIA-BIA_BMR",
    "BIA-BIA_Fat",
    "SDS-SDS_Total_Raw",
    "CGAS-CGAS_Score",
    "FGC-FGC_PU_Zone",
    "BIA-BIA_LDM",
    "Fitness_Endurance-Max_Stage",
    "PAQ_A-PAQ_A_Total",
    "BIA-BIA_TBW",
    "FGC-FGC_GSD_Zone",
    "Physical-Diastolic_BP",
]

###################### categorical columns ######################
# trainのtargetをonehot化
onehot_encoder = OneHotEncoder(handle_unknown="ignore", sparse=False)
onehot_encoder.fit(train[categorical_columns])

with open("./assets/onehot_encoder.pkl", "wb") as f:
    pickle.dump(onehot_encoder, f)

categorical_feature = onehot_encoder.transform(train[categorical_columns])

###################### double columns ######################
# trainのtargetを標準化
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(train[double_columns + add_features])

with open("./assets/scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

double_feature = scaler.transform(train[double_columns + add_features])
# double_feature = train[double_columns].values

# 欠損値の補完
double_feature = np.nan_to_num(double_feature)

###################### inputの作成 ######################

ids = train["id"].values.reshape(-1, 1)
X = np.concatenate([categorical_feature, double_feature], axis=1)
y = train["sii"].fillna(-1).values.reshape(-1, 1)

# DataFrameの作成
ids_df = pd.DataFrame(ids, columns=["id"])
X_df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(X.shape[1])])
y_df = pd.DataFrame(y, columns=["sii"])

train_df = pd.concat([ids_df, X_df, y_df], axis=1)
train_df

Unnamed: 0,id,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,...,feature_103,feature_104,feature_105,feature_106,feature_107,feature_108,feature_109,feature_110,feature_111,sii
0,00008ff9,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,-0.982821,0.014794,0.014632,0.172934,0.148872,-0.168935,-0.054949,0.005539,2.207978,2.0
1,000fd460,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,...,2.918586,0.014372,0.014182,0.277342,0.259111,-0.264243,0.168535,-0.037231,2.771143,0.0
2,00105258,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,-0.168185,0.015855,0.015846,0.069164,0.078134,-0.000049,-0.000368,0.060192,-0.287631,0.0
3,00115b9f,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.340601,0.015894,0.015928,-0.060816,-0.050308,-0.124803,-0.058785,-0.056136,0.587341,1.0
4,0016bb22,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,-0.168185,0.015855,0.015846,-0.059385,-0.063458,0.005480,-0.000368,-0.023088,-0.287631,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3955,ff8a2de4,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,-0.046460,0.015261,0.015210,-0.020161,-0.004701,-0.099419,0.176353,0.003719,0.036804,1.0
3956,ffa9794a,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,...,-0.168185,0.015855,0.015846,0.060245,0.068311,0.031303,-0.000368,0.054414,-0.287631,-1.0
3957,ffcd4dbd,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.771192,0.017739,0.017773,-0.188196,-0.232435,-0.118557,-0.127217,-0.108407,-0.107980,1.0
3958,ffed1dd5,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.178480,0.013327,0.012826,0.063652,0.312696,0.031154,-2.208516,0.111359,-0.382681,0.0


In [6]:
def read_parquet(base_dir, id_):
    path = os.path.join(base_dir, f"id={id_}", "part-0.parquet")
    return pd.read_parquet(path)


def get_valid_ids(base_dir):
    return [f.split("=")[1].split(".")[0] for f in os.listdir(base_dir)]


p = read_parquet(base_dir="../../inputs/series_train.parquet/", id_="ffcd4dbd")
# p = read_parquet(base_dir="../../inputs/series_train.parquet/", id_="10e46254")
# p

In [7]:
from glob import glob

# len(glob("../../normalized/*"))
len(glob("../../inputs/series_train.parquet/*"))

996

## Metric

In [8]:
from sklearn.metrics import *


def quadratic_weighted_kappa(y_true, y_pred):
    return cohen_kappa_score(y_true, y_pred, weights="quadratic")


def threshold_Rounder(oof_non_rounded, thresholds):
    return np.where(
        oof_non_rounded < thresholds[0],
        0,
        np.where(
            oof_non_rounded < thresholds[1],
            1,
            np.where(oof_non_rounded < thresholds[2], 2, 3),
        ),
    )


def evaluate_predictions(thresholds, y_true, oof_non_rounded):
    rounded_p = threshold_Rounder(oof_non_rounded, thresholds)
    return -quadratic_weighted_kappa(y_true, rounded_p)

## Model, Dataset

In [9]:
train_df.head()

Unnamed: 0,id,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,...,feature_103,feature_104,feature_105,feature_106,feature_107,feature_108,feature_109,feature_110,feature_111,sii
0,00008ff9,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,-0.982821,0.014794,0.014632,0.172934,0.148872,-0.168935,-0.054949,0.005539,2.207978,2.0
1,000fd460,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,...,2.918586,0.014372,0.014182,0.277342,0.259111,-0.264243,0.168535,-0.037231,2.771143,0.0
2,00105258,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,-0.168185,0.015855,0.015846,0.069164,0.078134,-4.9e-05,-0.000368,0.060192,-0.287631,0.0
3,00115b9f,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.340601,0.015894,0.015928,-0.060816,-0.050308,-0.124803,-0.058785,-0.056136,0.587341,1.0
4,0016bb22,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,-0.168185,0.015855,0.015846,-0.059385,-0.063458,0.00548,-0.000368,-0.023088,-0.287631,-1.0


In [10]:
from sklearn.model_selection import train_test_split

use_ids = list(
    train_df[train_df["sii"] != -1]["id"].unique()
)  # get_valid_ids(base_dir="../../normalized/")

len(use_ids)

2736

## Training

In [11]:
from tqdm import tqdm
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, shuffle=True, random_state=42)
use_ids = np.array(use_ids)
for train_index, valid_index in kf.split(use_ids):
    train_ids = [use_ids[i] for i in train_index]
    valid_ids = [use_ids[i] for i in valid_index]

    train_dataset = CMIDataset(
        table_df=train_df,
        valid_ids=use_ids,
        base_dir="../../inputs/series_train.parquet/",
        save_filename=nb_name,
    )

In [12]:
train_dataset[0]["time_input"].shape

torch.Size([31, 17280, 15])

In [13]:
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=4, shuffle=True, random_state=42)

CV = []

batch_size = 1

# use_ids = np.array(use_ids[:30]) # debug
use_ids = np.array(use_ids)

extract_df = train[train["id"].isin(use_ids)].reset_index(drop=True)

test_df = train[["id", "sii"]].copy()
# test_df["pred_sii"] = 0
oof_preds = []

for fold, (train_ids, valid_ids) in enumerate(
    skf.split(extract_df["id"], extract_df["sii"])
):
    print(f"################### fold:{fold} ###################")
    best_valid_score = -100

    train_ids = use_ids[train_ids]
    valid_ids = use_ids[valid_ids]

    train_dataset = CMIDataset(
        table_df=train_df,
        valid_ids=train_ids,
        base_dir="../../inputs/series_train.parquet/",
        save_filename=nb_name,
    )
    train_loader = DataLoader(
        train_dataset, batch_size=batch_size, shuffle=True, num_workers=30
    )

    vlaid_dataset = CMIDataset(
        table_df=train_df,
        valid_ids=valid_ids,
        base_dir="../../inputs/series_train.parquet/",
        save_filename=nb_name,
    )

    valid_loader = DataLoader(
        vlaid_dataset, batch_size=batch_size, shuffle=False, num_workers=30
    )
    # data_loader = DataLoader(dataset, batch_size=1, shuffle=True)

    # model = TimeEncoder(input_size=26, hidden_size=13, num_layers=2).to("cuda")
    model = CMIModel(input_size=26, hidden_size=13, num_layers=2).to("cuda")

    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    spot_oof_preds = []

    for epoch in range(5):
        total_train_loss = 0
        total_valid_loss = 0

        train_pred = []
        valid_pred = []
        trian_gt = []
        valid_gt = []

        tq = tqdm(train_loader)
        for i, data in enumerate(train_loader):
            table_input = data["table_input"].to("cuda")
            time_input = data["time_input"].to("cuda")
            mask = data["mask"].to("cuda").to(torch.float32)
            target_ = data["output"].to("cuda")
            optimizer.zero_grad()
            output, attention_weight = model(table_input, time_input, active_mask=mask)
            output = output * 3
            loss = criterion(output, target_)
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item()

            train_pred.append(output.detach().cpu().numpy())
            trian_gt.append(target_.detach().cpu().numpy())

            tq.set_postfix(loss=total_train_loss / (i + 1))
            tq.update()
        tq.close()

        tq = tqdm(valid_loader)
        for i, data in enumerate(valid_loader):
            table_input = data["table_input"].to("cuda")
            time_input = data["time_input"].to("cuda")
            mask = data["mask"].to("cuda").to(torch.float32)
            target_ = data["output"].to("cuda")
            output, attention_weight = model(table_input, time_input, active_mask=mask)
            output = output * 3
            loss = criterion(output, target_)
            total_valid_loss += loss.item()

            valid_pred.append(output.detach().cpu().numpy())
            valid_gt.append(target_.detach().cpu().numpy())

            tq.set_postfix(loss=total_valid_loss / (i + 1))
            tq.update()
        tq.close()

        metric_train_pred = np.concatenate(train_pred)
        metric_valid_pred = np.concatenate(valid_pred)
        metric_train_gt = np.concatenate(trian_gt)
        metric_valid_gt = np.concatenate(valid_gt)

        train_score = quadratic_weighted_kappa(
            metric_train_gt, metric_train_pred.round(0).astype(int)
        )

        valid_score = quadratic_weighted_kappa(
            metric_valid_gt, metric_valid_pred.round(0).astype(int)
        )

        print(
            f"epoch: {epoch}, loss: {total_train_loss / len(train_loader)}, valid_loss: {total_valid_loss / len(valid_loader)}, train_score: {train_score}, valid_score: {valid_score}"
        )

        if valid_score > best_valid_score:
            best_valid_score = valid_score
            torch.save(model.state_dict(), f"./assets/model_{fold}.pth")

            spot_oof_preds = []
            for i, id_ in enumerate(valid_ids):
                spot_oof_preds.append({"id": id_, "pred_sii": valid_pred[i][0][0]})

    oof_preds.append(spot_oof_preds)
    CV.append(best_valid_score)

print(f"CV: {np.mean(CV)}")

################### fold:0 ###################


  0%|          | 0/2052 [00:00<?, ?it/s]Could not load symbol cublasGetSmCountTarget from libcublas.so.11. Error: /usr/local/cuda-11.3/lib64/libcublas.so.11: undefined symbol: cublasGetSmCountTarget
100%|██████████| 2052/2052 [03:25<00:00,  9.98it/s, loss=0.523]
100%|██████████| 684/684 [01:03<00:00, 10.71it/s, loss=0.45] 


epoch: 0, loss: 0.5229103024556638, valid_loss: 0.44971122449290274, train_score: 0.2841245259630778, valid_score: 0.3337436419125128


100%|██████████| 2052/2052 [02:16<00:00, 15.02it/s, loss=0.467]
100%|██████████| 684/684 [01:21<00:00,  8.43it/s, loss=0.468]


epoch: 1, loss: 0.4665845710845385, valid_loss: 0.4679466010123245, train_score: 0.3554191226521496, valid_score: 0.3608689939117865


100%|██████████| 2052/2052 [02:32<00:00, 13.45it/s, loss=0.455]
100%|██████████| 684/684 [00:32<00:00, 20.90it/s, loss=0.451]


epoch: 2, loss: 0.45544363384494313, valid_loss: 0.4510307322716843, train_score: 0.411449460310675, valid_score: 0.40180449812699515


100%|██████████| 2052/2052 [02:24<00:00, 14.15it/s, loss=0.415]
100%|██████████| 684/684 [00:36<00:00, 18.65it/s, loss=0.463]


epoch: 3, loss: 0.41510874125334707, valid_loss: 0.4628164976719525, train_score: 0.46677000698150284, valid_score: 0.3619147659063625


100%|██████████| 2052/2052 [03:40<00:00,  9.30it/s, loss=0.369]
100%|██████████| 684/684 [00:30<00:00, 22.75it/s, loss=0.518]


epoch: 4, loss: 0.3691216461374864, valid_loss: 0.5179024165752811, train_score: 0.5248272534976064, valid_score: 0.3784320196754811
################### fold:1 ###################


100%|██████████| 2052/2052 [02:27<00:00, 13.90it/s, loss=0.513]
100%|██████████| 684/684 [00:38<00:00, 17.68it/s, loss=0.436] 


epoch: 0, loss: 0.5132258811653908, valid_loss: 0.43639017935564955, train_score: 0.30375932863679955, valid_score: 0.4032398380080996


100%|██████████| 2052/2052 [02:54<00:00, 11.78it/s, loss=0.466]
100%|██████████| 684/684 [01:21<00:00,  8.39it/s, loss=0.495]  


epoch: 1, loss: 0.46645432838791584, valid_loss: 0.4946224476135591, train_score: 0.3696697269427054, valid_score: 0.354027676620539


100%|██████████| 2052/2052 [02:49<00:00, 12.13it/s, loss=0.444]  
100%|██████████| 684/684 [00:36<00:00, 18.86it/s, loss=0.466]


epoch: 2, loss: 0.44350014527344306, valid_loss: 0.4660196341952301, train_score: 0.3903654886640008, valid_score: 0.4193245467178245


100%|██████████| 2052/2052 [03:00<00:00, 11.35it/s, loss=0.406]
100%|██████████| 684/684 [00:37<00:00, 18.48it/s, loss=0.472]  


epoch: 3, loss: 0.40601189301606083, valid_loss: 0.4717257676670203, train_score: 0.4674839716220395, valid_score: 0.34302591085078926


100%|██████████| 2052/2052 [02:21<00:00, 14.52it/s, loss=0.366] 
100%|██████████| 684/684 [01:09<00:00,  9.77it/s, loss=0.485]   


epoch: 4, loss: 0.3655140114488075, valid_loss: 0.48540396008394227, train_score: 0.52844341665705, valid_score: 0.39363351924908174
################### fold:2 ###################


100%|██████████| 2052/2052 [03:06<00:00, 10.98it/s, loss=0.503]  
100%|██████████| 684/684 [00:40<00:00, 16.95it/s, loss=0.559]


epoch: 0, loss: 0.5028299611001031, valid_loss: 0.5592062453923109, train_score: 0.33331441818391205, valid_score: 0.3048049053463302


100%|██████████| 2052/2052 [02:25<00:00, 14.13it/s, loss=0.457]
100%|██████████| 684/684 [01:08<00:00,  9.95it/s, loss=0.499] 


epoch: 1, loss: 0.45652383609274827, valid_loss: 0.49935593149576785, train_score: 0.35764497957940866, valid_score: 0.3143968986306216


100%|██████████| 2052/2052 [02:38<00:00, 12.91it/s, loss=0.431]  
100%|██████████| 684/684 [01:01<00:00, 11.17it/s, loss=0.489]


epoch: 2, loss: 0.4306335033519896, valid_loss: 0.48919678580419124, train_score: 0.4064470165713505, valid_score: 0.3178428424703412


100%|██████████| 2052/2052 [02:41<00:00, 12.71it/s, loss=0.398] 
100%|██████████| 684/684 [00:37<00:00, 18.01it/s, loss=0.517] 


epoch: 3, loss: 0.39795902837071423, valid_loss: 0.5173946780673806, train_score: 0.44333222991029986, valid_score: 0.3527448690138495


100%|██████████| 2052/2052 [01:23<00:00, 24.61it/s, loss=0.916]
100%|██████████| 684/684 [00:17<00:00, 38.41it/s, loss=0.934]


epoch: 4, loss: 0.9161744468256791, valid_loss: 0.9342105263157895, train_score: 0.010049096224923115, valid_score: 0.0
################### fold:3 ###################


100%|██████████| 2052/2052 [01:01<00:00, 33.49it/s, loss=0.51] 
100%|██████████| 684/684 [00:25<00:00, 27.03it/s, loss=0.491]


epoch: 0, loss: 0.5095295952653869, valid_loss: 0.4906628628648178, train_score: 0.339169900720545, valid_score: 0.32154577261489414


100%|██████████| 2052/2052 [01:02<00:00, 32.74it/s, loss=0.452]
100%|██████████| 684/684 [00:18<00:00, 36.96it/s, loss=0.495]


epoch: 1, loss: 0.45227729988443605, valid_loss: 0.4945128505119836, train_score: 0.39265132783992274, valid_score: 0.3306999428344827


100%|██████████| 2052/2052 [01:01<00:00, 33.47it/s, loss=0.428]
100%|██████████| 684/684 [00:16<00:00, 40.90it/s, loss=0.547]


epoch: 2, loss: 0.4275991735415207, valid_loss: 0.5468000329792423, train_score: 0.4354370255012654, valid_score: 0.2740503636082313


100%|██████████| 2052/2052 [01:04<00:00, 31.98it/s, loss=0.383]
100%|██████████| 684/684 [00:19<00:00, 35.32it/s, loss=0.557]


epoch: 3, loss: 0.382781775042117, valid_loss: 0.557022479931277, train_score: 0.512870913040936, valid_score: 0.3653992569448201


100%|██████████| 2052/2052 [01:00<00:00, 33.74it/s, loss=0.358]
100%|██████████| 684/684 [00:16<00:00, 40.39it/s, loss=0.586]

epoch: 4, loss: 0.3583767141935886, valid_loss: 0.5862478747465697, train_score: 0.5638523935960612, valid_score: 0.30232495716477803
CV: 0.3848182927008723





In [14]:
oof_preds_df = pd.concat([pd.DataFrame(p) for p in oof_preds], axis=0).reset_index(
    drop=True
)
oof_preds_df.head()

Unnamed: 0,id,pred_sii
0,00105258,0.351801
1,00115b9f,0.501462
2,0038ba98,0.273845
3,00ae59c9,0.542925
4,00f574e9,0.48162


In [15]:
test_pred_df = test_df.merge(oof_preds_df, on="id", how="inner")
test_pred_df.head()

Unnamed: 0,id,sii,pred_sii
0,00008ff9,2.0,0.14384
1,000fd460,0.0,0.117646
2,00105258,0.0,0.351801
3,00115b9f,1.0,0.501462
4,001f3379,1.0,0.13869


In [16]:
from scipy.optimize import minimize

KappaOPtimizer = minimize(
    evaluate_predictions,
    x0=[0.5, 1.5, 2.5],
    args=(test_pred_df["sii"], test_pred_df["pred_sii"]),
    method="Nelder-Mead",
)
assert KappaOPtimizer.success, "Optimization did not converge."

oof_tuned = threshold_Rounder(test_pred_df["pred_sii"], KappaOPtimizer.x)
tKappa = quadratic_weighted_kappa(test_pred_df["sii"], oof_tuned)
print(f"tuned Kappa: {tKappa}")

tuned Kappa: 0.443827156262638


In [17]:
print(KappaOPtimizer.x)

[0.52208806 0.97566028 3.22306375]


In [18]:
# rm -f /mnt/wslg/runtime-dir/vscode-git-5e5d7f1bbd.sock