In [1]:
import sys
import os
from dotenv import load_dotenv

root_dir = os.path.abspath("..")
sys.path.append(root_dir)
dotenv_path = os.path.join(root_dir, ".env")
load_dotenv(dotenv_path)

True

In [None]:
from src.data_insert import ParquetRankDataset
from src.model import RankerNN
from src.losses import pairwise_loss

from torch.utils.data import DataLoader
import torch
from datetime import datetime

In [3]:
NORMALIZATION = os.path.join(root_dir, "data", "train", "train_split_0.parquet")

train_file_paths = []
valid_file_paths = []

# 8개 train 조각
for i in range(8):
    path = os.path.join(root_dir, "data", "train", f"train_split_{i}.parquet")
    train_file_paths.append(path)

# 2개 valid 조각
for i in range(8, 10):
    path = os.path.join(root_dir, "data", "train", f"train_split_{i}.parquet")
    valid_file_paths.append(path)

In [4]:
EXCLUDED_COLS = ['row_id', 'ranker_id', 'selected']

LABEL_COL = 'selected'
GROUP_COL = 'ranker_id'

train_dataset_stream = ParquetRankDataset(
    parquet_paths=train_file_paths,
    exclude_feature_cols=EXCLUDED_COLS,
    label_col=LABEL_COL,
    group_col=GROUP_COL,
    max_rows=4096,
    normalization_parquet=NORMALIZATION,
)

valid_dataset_stream = ParquetRankDataset(
    parquet_paths=valid_file_paths,
    exclude_feature_cols=EXCLUDED_COLS,
    label_col=LABEL_COL,
    group_col=GROUP_COL,
    max_rows=4096,
    normalization_parquet=NORMALIZATION,
)

  self.feature_cols: list[str] = [c for c in first_schema.columns if c not in self.exclude_feature_cols]


[INFO] Normalization stats loaded from /Users/goonzard/Developer/data-science-09-kaggle-airplane/data/train/train_split_0.parquet
[INFO] Example mean/std: [('emb_0', 0.044884927570819855), ('emb_1', -0.026697352528572083), ('emb_2', -0.10396946221590042), ('emb_3', 0.04172823578119278), ('emb_4', -0.01801719330251217)]
[INFO] Normalization stats loaded from /Users/goonzard/Developer/data-science-09-kaggle-airplane/data/train/train_split_0.parquet
[INFO] Example mean/std: [('emb_0', 0.044884927570819855), ('emb_1', -0.026697352528572083), ('emb_2', -0.10396946221590042), ('emb_3', 0.04172823578119278), ('emb_4', -0.01801719330251217)]


In [8]:
HIDDEN_LAYERS = [512, 256, 128]
HIDDEN_LAYERS_STR = "_".join(map(str, HIDDEN_LAYERS))

DROP_RATE = 0.2
LEARNING_RATE = 1e-3
TRAIN_DATE = datetime.now().strftime("%Y%m%dT%H%M%S")

VAL_INTERVAL = 500
PATIENCE = 5
BEST_VAL_LOSS = float("inf")
NO_IMPROVE_COUNT = 0

NUM_EPOCHS = 5

MODEL_NAME = f"best_model_{HIDDEN_LAYERS_STR}_{DROP_RATE}_{LEARNING_RATE}.pt"
MODEL_OUTPUT = os.path.join(root_dir, "models", MODEL_NAME)

In [9]:
model = RankerNN(
    n_features=train_dataset_stream.feature_len, 
    hidden_layers=HIDDEN_LAYERS, 
    dropout=DROP_RATE
)

In [10]:
train_loader = DataLoader(train_dataset_stream, batch_size=None, shuffle=False)
valid_loader = DataLoader(valid_dataset_stream, batch_size=None, shuffle=False)

if torch.backends.mps.is_available():
    device = torch.device("mps")  # Apple Silicon GPU via Metal
else:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
model.to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer,
    T_max=3,  # number of epochs
)

for epoch in range(NUM_EPOCHS):
    model.train()
    total_train_loss = 0.0
    step = 0

    print(f"\n[Epoch {epoch+1}] ------------------------------------------------")
    for X, y, g in train_loader:
        X, y, g = X.to(device), y.to(device), g.to(device)

        optimizer.zero_grad()
        scores = model(X)
        loss = pairwise_loss(scores, y, g)

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
        optimizer.step()
        scheduler.step()

        total_train_loss += loss.item()
        step += 1
        avg_loss = total_train_loss / step

        # Return per step training loss
        print(f"[Epoch {epoch+1} | Step {step}] Loss: {loss.item():.4f} | Running Avg: {avg_loss:.4f}")

        if step % VAL_INTERVAL == 0:
            model.eval()
            total_val_loss = 0.0
            val_steps = 0
            print(f"[Validate][Epoch {epoch+1} | Step {step}] --------------------------")
            with torch.no_grad():
                for Xv, yv, gv in valid_loader:
                    Xv, yv, gv = Xv.to(device), yv.to(device), gv.to(device)
                    val_scores = model(Xv)
                    val_loss = pairwise_loss(val_scores, yv, gv)

                    print(f"[Val] Loss: {val_loss.item():.4f}")
                    total_val_loss += val_loss.item()
                    val_steps += 1

            avg_val_loss = total_val_loss / val_steps if val_steps > 0 else 0.0
            print(f"[Validate][Epoch {epoch+1} | Step {step}] Average Val Loss: {avg_val_loss:.4f}")
            model.train()

            if avg_val_loss < BEST_VAL_LOSS:
                BEST_VAL_LOSS = avg_val_loss
                NO_IMPROVE_COUNT = 0
                torch.save(model.state_dict(), MODEL_OUTPUT)

            else:
                NO_IMPROVE_COUNT += 1
                if NO_IMPROVE_COUNT >= PATIENCE:
                    print(f"[INFO] Early stopping at epoch {epoch+1}")
                    break

    avg_epoch_loss = total_train_loss / step if step > 0 else 0.0
    print(f"[Epoch {epoch+1} Completed] Average Train Loss: {avg_epoch_loss:.4f}")


[Epoch 1] ------------------------------------------------
[Epoch 1 | Step 1] Loss: 0.6794 | Running Avg: 0.6794
[Epoch 1 | Step 2] Loss: 0.7808 | Running Avg: 0.7301
[Epoch 1 | Step 3] Loss: 0.8506 | Running Avg: 0.7703
[Epoch 1 | Step 4] Loss: 0.9260 | Running Avg: 0.8092
[Epoch 1 | Step 5] Loss: 0.8528 | Running Avg: 0.8179
[Epoch 1 | Step 6] Loss: 0.9869 | Running Avg: 0.8461
[Epoch 1 | Step 7] Loss: 0.9797 | Running Avg: 0.8652
[Epoch 1 | Step 8] Loss: 0.6720 | Running Avg: 0.8410
[Epoch 1 | Step 9] Loss: 0.6814 | Running Avg: 0.8233
[Epoch 1 | Step 10] Loss: 0.6344 | Running Avg: 0.8044
[Epoch 1 | Step 11] Loss: 0.7857 | Running Avg: 0.8027
[Epoch 1 | Step 12] Loss: 0.7107 | Running Avg: 0.7950
[Epoch 1 | Step 13] Loss: 0.8245 | Running Avg: 0.7973
[Epoch 1 | Step 14] Loss: 0.6532 | Running Avg: 0.7870
[Epoch 1 | Step 15] Loss: 0.7519 | Running Avg: 0.7847
[Epoch 1 | Step 16] Loss: 0.7489 | Running Avg: 0.7824
[Epoch 1 | Step 17] Loss: 0.7525 | Running Avg: 0.7807
[Epoch 1 | Ste

KeyboardInterrupt: 