# 셀 1 — 경로/임포트/시드/데이터 로드

In [None]:
import os, sys, time, random
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

# titanic_dataset.py 경로 추가
sys.path.append(os.path.abspath("../_03_homeworks/homework_2"))

from titanic_dataset import get_preprocessed_dataset 

# 실험 반복 시 결과 변동을 최소화
def set_seed(seed=42):
    random.seed(seed); np.random.seed(seed)
    torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# 데이터셋 로드
train_ds, val_ds, test_ds = get_preprocessed_dataset()
in_dim = int(train_ds[0]["input"].numel())
print(f"in_dim={in_dim}, |train|={len(train_ds)}, |val|={len(val_ds)}, |test|={len(test_ds)}")

# 제출용 PassengerId를 위해 test.csv 경로 확보
TEST_CSV_PATH = os.path.abspath("../_03_homeworks/homework_2/test.csv")
if not os.path.exists(TEST_CSV_PATH):
    TEST_CSV_PATH = os.path.abspath("./test.csv")  # 대안
print("TEST_CSV_PATH:", TEST_CSV_PATH)


Device: cpu


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  all_df["alone"].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  all_df["Embarked"].fillna("missing", inplace=True)


Index(['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare',
       'Embarked', 'title', 'family_num', 'alone'],
      dtype='object')
   Survived  Pclass  Sex   Age  SibSp  Parch     Fare  Embarked  title  \
0       0.0       3    1  22.0      1      0   7.2500         2      2   
1       1.0       1    0  38.0      1      0  71.2833         0      3   
2       1.0       3    0  26.0      0      0   7.9250         2      1   
3       1.0       1    0  35.0      1      0  53.1000         2      3   
4       0.0       3    1  35.0      0      0   8.0500         2      2   
5       0.0       3    1  29.0      0      0   8.4583         1      2   
6       0.0       1    1  54.0      0      0  51.8625         2      2   
7       0.0       3    1   2.0      3      1  21.0750         2      0   
8       1.0       3    0  27.0      0      2  11.1333         2      3   
9       1.0       2    0  14.0      1      0  30.0708         0      3   

   family_num  alone  
0           1    0.

# 셀 2 — 모델/학습 유틸(옵션: W&B 로깅)

In [None]:
USE_WANDB = True
WANDB_PROJECT = "titanic-grid"

if USE_WANDB:
    try:
        import wandb
    except Exception as e:
        print("[WARN] wandb import 실패 -> 로깅 비활성화:", e)
        USE_WANDB = False
        wandb = None
else:
    wandb = None

# 활성화 함수 스위치: argparse 없이 문자열로 아키텍처 교체 가능
ACTIVATIONS = {
    "sigmoid": nn.Sigmoid,
    "relu": nn.ReLU,
    "elu": nn.ELU,
    "leaky_relu": nn.LeakyReLU,
}

class MLP(nn.Module):
    # hidden_dims/activation을 바꾸며 실험 가능
    def __init__(self, in_dim: int, hidden_dims=(64, 32), num_classes=2, act="relu", dropout=0.0):
        super().__init__()
        act_layer = ACTIVATIONS[act]
        dims = [in_dim] + list(hidden_dims)
        layers = []
        for i in range(len(dims) - 1):
            layers += [
                nn.Linear(dims[i], dims[i+1]),
                nn.BatchNorm1d(dims[i+1]),
                act_layer()
            ]
            if dropout > 0:
                layers.append(nn.Dropout(dropout))
        layers.append(nn.Linear(dims[-1], num_classes)) # 최종 분류 헤드
        self.net = nn.Sequential(*layers)
    def forward(self, x): return self.net(x)

# 검증 루프: grad 비활성화로 속도/메모리 절약, CrossEntropyLoss 및 정확도 계산
@torch.no_grad()
def evaluate(model, loader, device):
    model.eval()
    ce = nn.CrossEntropyLoss()
    tot_loss, tot_correct, tot = 0.0, 0, 0
    for batch in loader:
        x = batch["input"].to(device)
        y = batch["target"].to(device)
        logits = model(x)
        loss = ce(logits, y)
        pred = torch.argmax(logits, dim=1)
        tot_loss += loss.item() * x.size(0)
        tot_correct += (pred == y).sum().item()
        tot += x.size(0)
    return tot_loss / tot, tot_correct / tot

def train_one_run(
    activation="relu", # 활성화 함수 비교
    batch_size=32, # 배치 크기 비교
    hidden_dims=(64, 32),
    dropout=0.0,
    epochs=50,
    lr=1e-3,
    weight_decay=0.0,
    device=device,
    use_wandb=USE_WANDB, # WandB 로깅 on/off
    wandb_project=WANDB_PROJECT
):
    
    set_seed(42) # 조합별 시드 고정: 공정 비교

    #  DataLoader 구성: 배치 크기만 다르게 설정해 비교
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    val_loader   = DataLoader(val_ds, batch_size=len(val_ds), shuffle=False)

    model = MLP(in_dim=in_dim, hidden_dims=hidden_dims, num_classes=2, act=activation, dropout=dropout).to(device)
    ce = nn.CrossEntropyLoss()
    opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    # 필요 시 WandB로 epoch별 loss/acc 로깅
    run = None
    if use_wandb:
        run = wandb.init(
            project=wandb_project,
            name=f"act={activation}|bs={batch_size}",
            config={
                "activation": activation, "batch_size": batch_size, "hidden_dims": hidden_dims,
                "dropout": dropout, "epochs": epochs, "lr": lr, "weight_decay": weight_decay
            }
        )

    best = {"val_acc": -1.0, "epoch": -1, "state": None, "val_loss": None, "train_loss": None, "train_acc": None}
    for ep in range(1, epochs + 1):
        model.train()
        tl, tc, n = 0.0, 0, 0
        for batch in train_loader:
            x = batch["input"].to(device); y = batch["target"].to(device)
            logits = model(x); loss = ce(logits, y)
            opt.zero_grad(); loss.backward(); opt.step() # 누적된 기울기 초기화, 역전파, 파라미터 갱신
            pred = torch.argmax(logits, 1)
            tl += loss.item() * x.size(0); tc += (pred == y).sum().item(); n += x.size(0)
        train_loss = tl / n
        train_acc  = tc / n

        val_loss, val_acc = evaluate(model, val_loader, device)

        if use_wandb:
            wandb.log({"epoch": ep, "train/loss": train_loss, "train/acc": train_acc,
                       "val/loss": val_loss, "val/acc": val_acc})

        # 베스트 모델 스냅샷 보관
        if val_acc > best["val_acc"]:
            best.update({
                "val_acc": val_acc, "epoch": ep, "state": {k: v.detach().cpu().clone() for k,v in model.state_dict().items()},
                "val_loss": val_loss, "train_loss": train_loss, "train_acc": train_acc
            })

    if run is not None:
        run.finish()

    return best


# 셀 3 — 16개 조합 학습 & 결과표

In [None]:
# 4(activation) × 4(batch) = 16 조합 자동 학습
activations = ["sigmoid", "relu", "elu", "leaky_relu"]
batch_sizes = [16, 32, 64, 128]

EPOCHS = 50
LR = 1e-3
WD = 0.0
HIDDEN_DIMS = (64, 32)
DROPOUT = 0.0

records = []
t0 = time.time()
for act in activations:
    for bs in batch_sizes:
        print(f"▶ run: activation={act}, batch_size={bs}")
        best = train_one_run(
            activation=act, batch_size=bs,
            hidden_dims=HIDDEN_DIMS, dropout=DROPOUT,
            epochs=EPOCHS, lr=LR, weight_decay=WD,
            device=device, use_wandb=USE_WANDB, wandb_project=WANDB_PROJECT
        )
        records.append({
            "activation": act,
            "batch_size": bs,
            "best_epoch": best["epoch"],
            "val_acc": best["val_acc"],
            "val_loss": best["val_loss"],
            "train_acc": best["train_acc"],
            "train_loss": best["train_loss"],
            "state": best["state"],
        })
print(f"총 소요시간: {time.time()-t0:.1f}s")

# 결과표 정렬/표시
df_results = pd.DataFrame([{k:v for k,v in r.items() if k!="state"} for r in records])
df_results.sort_values(["val_acc", "activation", "batch_size"], ascending=[False, True, True], inplace=True)
df_results.reset_index(drop=True, inplace=True)
display(df_results)

# 상위 5개만 csv로 저장(옵션)
os.makedirs("./outputs", exist_ok=True)
top5_path = "./outputs/grid_search_top5.csv"
df_results.head(5).to_csv(top5_path, index=False)
print(f"Top-5 결과 저장 → {top5_path}")


▶ run: activation=sigmoid, batch_size=16


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
train/acc,▁▃▃▅▆▇▇▇▇▇▇▇█▇▇▇▇▇█▇▇▇▇█▇█▇▇▇▇█▇█▇▇▇████
train/loss,█▇▆▅▄▃▂▂▂▃▃▂▂▂▃▂▂▁▂▂▂▂▂▁▂▂▁▂▂▂▁▁▃▂▂▂▂▂▂▁
val/acc,▁▁▂▂▅▆█▇█▇██▇▆▇▇▇▇▇█▇▇█▇▇▇▇▅▆▇▇▇▇▇█▇██▇▇
val/loss,██▇▆▄▃▂▂▂▂▁▂▁▂▂▁▁▂▁▂▁▂▂▁▁▁▂▂▂▂▁▁▁▁▁▁▁▁▁▂

0,1
epoch,50.0
train/acc,0.80224
train/loss,0.44033
val/acc,0.84831
val/loss,0.41918


▶ run: activation=sigmoid, batch_size=32


0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
train/acc,▁▃▃▄▄▅▇▇▇▇▇▇▇█▇▇▇▇█▇▇▇▇▇███▇▇▇██▇█▇██▇█▇
train/loss,█▆▆▆▅▃▃▂▂▃▂▁▂▂▂▁▂▁▂▂▂▂▂▁▂▁▁▂▂▁▁▁▂▁▁▁▁▂▁▁
val/acc,▂▁▁▂▂▆███▇▇█▆▇▆▇▇▅▇██▆▇▇▆▆▇▇▆▇▇▇▇▇█▇██▇▆
val/loss,███▇▇▅▃▃▂▂▂▁▂▂▂▂▁▂▁▁▂▁▂▂▁▁▁▁▂▁▂▁▁▁▁▁▁▁▁▂

0,1
epoch,50.0
train/acc,0.79243
train/loss,0.44182
val/acc,0.83146
val/loss,0.42515


▶ run: activation=sigmoid, batch_size=64


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
train/acc,▁▅▅▅▅▅▅▆▇▆▇▇██▇▇██▇████▇█▇█████▇██▇█████
train/loss,█▆▆▅▅▄▄▄▃▃▂▂▂▂▂▁▂▂▂▁▂▂▂▁▁▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁
val/acc,▂▂▂▁▁▂▂▃▆▆▇▆▇▇▇▆█▇█▇▆▆█▇▆█▇▇▆▇▇▆█▇▇▆▆█▆▇
val/loss,█▇▆▇▇▆▆▅▅▄▃▃▂▂▂▂▁▂▂▁▂▂▂▁▁▁▁▁▂▂▁▂▁▁▁▂▂▁▂▁

0,1
epoch,50.0
train/acc,0.79102
train/loss,0.45186
val/acc,0.84831
val/loss,0.38164


▶ run: activation=sigmoid, batch_size=128


0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
train/acc,▁▆▆▅▆▆▆▆▆▆▆▆▇▇▇▇████████████████████████
train/loss,█▆▅▅▅▅▅▅▄▄▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val/acc,▁▆▆▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇████████▇██▇▇▇████████
val/loss,█▇▆▆▆▆▆▆▆▆▆▅▅▅▄▄▃▃▂▂▂▁▂▂▁▂▁▁▁▁▂▂▁▁▁▁▁▁▁▁

0,1
epoch,50.0
train/acc,0.80084
train/loss,0.44639
val/acc,0.8764
val/loss,0.37103


▶ run: activation=relu, batch_size=16


0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
train/acc,▁▃▄▅▅▆▆▇▇▆▆██▇█▇█▇▇▇███▇▇█▇███▇▇██▇▇█▇▇█
train/loss,█▆▆▆▅▄▃▃▃▂▃▂▂▂▂▂▂▁▂▂▂▂▂▂▂▂▁▂▁▁▁▂▁▂▁▂▁▁▂▁
val/acc,▁▂▂▃▅▆▇▇▇████▆▆██▇▇▇██▆▇█▇▇▇▇▇▇▇█▇▇▅▇▇▇▆
val/loss,█▇▇▆▅▄▂▂▂▂▁▁▂▂▂▁▂▂▁▂▁▂▂▁▂▂▁▂▂▂▂▁▂▂▁▂▂▁▁▃

0,1
epoch,50.0
train/acc,0.81487
train/loss,0.40352
val/acc,0.82022
val/loss,0.43134


▶ run: activation=relu, batch_size=32


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/acc,▁▄▄▅▆▇▇▇▇▇█▇▇▇█▇█▇▇████▇█▇▇▇███▇████▇███
train/loss,█▆▆▅▅▄▄▃▄▃▃▃▂▂▂▂▂▁▂▂▂▂▂▁▂▁▂▂▁▂▁▂▁▂▂▂▂▁▁▁
val/acc,▁▁▂▃▄▅▅▆▆▆▇█▇▆█▇▆▇▇▇▇▆▇█▇▇▇▆▇▇▆▆▇▇▇▇▇▇▇▆
val/loss,█▇▆▆▅▅▄▄▃▃▂▁▁▃▂▂▁▂▁▂▂▁▁▁▁▁▂▂▁▁▂▁▂▁▁▁▁▁▂▃

0,1
epoch,50.0
train/acc,0.82468
train/loss,0.39225
val/acc,0.80899
val/loss,0.45239


▶ run: activation=relu, batch_size=64


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
train/acc,▁▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇▇▇▇▇▇████▇██▇█
train/loss,█▆▆▅▅▅▄▄▄▄▃▃▃▂▂▂▂▂▂▂▂▂▁▂▂▂▂▁▁▂▁▁▁▁▁▂▁▁▁▁
val/acc,▁▁▁▂▃▄▄▅▅▅▆▆▅▆▇▆▆██▇▇▆█▇▇▇█▇▇▇▇▇██▇█▆▇▇█
val/loss,█▇▆▆▆▅▅▅▄▄▄▄▃▃▃▃▂▁▁▁▂▁▂▁▃▁▁▂▁▁▂▂▁▁▂▁▁▁▁▁

0,1
epoch,50.0
train/acc,0.8317
train/loss,0.38434
val/acc,0.85955
val/loss,0.38822


▶ run: activation=relu, batch_size=128


0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
train/acc,▁▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇█▇▇█▇█▇████████████
train/loss,█▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
val/acc,▁▁▁▁▁▂▂▂▃▃▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▆▇▇█▇██▇██▇█▇
val/loss,██▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▃▂▁▁▂▁▂▁▂▁▁▂▁▁▁▁▁

0,1
epoch,50.0
train/acc,0.83731
train/loss,0.38318
val/acc,0.84831
val/loss,0.38721


▶ run: activation=elu, batch_size=16


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
train/acc,▁▂▃▅▅▇▇▇▆▇▇▆██▇▇▇▇█▇▇▆▇██▇█▇▇▇▇▇▆██▇████
train/loss,█▇▆▅▄▃▂▂▂▃▃▁▂▂▂▂▂▁▂▁▂▂▁▁▂▁▁▂▁▂▁▁▁▂▁▂▁▁▂▁
val/acc,▁▁▂▄▇▇▇██▇██▇█▇█▇▆███▇█▇███▆▇█▇██▇█▇███▇
val/loss,█▇▆▄▃▂▂▂▁▂▁▁▂▂▂▁▁▂▁▁▁▁▂▁▂▁▁▂▂▂▂▂▁▁▁▂▁▁▁▂

0,1
epoch,50.0
train/acc,0.80645
train/loss,0.42353
val/acc,0.85955
val/loss,0.39466


▶ run: activation=elu, batch_size=32


0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
train/acc,▁▃▄▄▅▆▇▇▇▇▇▆▇▇▇▇▇▇█▇▇▇█▇█▇██▇▇██▇▇▇███▇▇
train/loss,█▆▅▅▅▃▂▃▂▃▂▂▂▂▂▂▂▁▂▁▂▂▂▁▂▁▁▂▁▁▁▂▁▁▁▂▂▁▂▁
val/acc,▁▁▁▃▄▆█▇▇▇▇█▆▇██▆▆█▇█▆▆▇▆▇▇█▅█▆▆█▇▇▆█▇▇▆
val/loss,█▇▆▆▅▄▂▂▁▂▁▁▂▁▂▁▂▂▁▁▂▂▁▂▁▁▁▂▁▁▂▁▁▁▁▁▁▁▁▂

0,1
epoch,50.0
train/acc,0.79383
train/loss,0.42395
val/acc,0.83708
val/loss,0.40343


▶ run: activation=elu, batch_size=64


0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
train/acc,▁▄▅▅▅▆▆▇▇▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇█▇█▇▇▇██▇███████
train/loss,█▆▆▅▅▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▂▁▂▂▁▁▁▁▂▁▁▂▁▁▁▁▁
val/acc,▁▁▁▁▃▄▃▅▆▇▇▇▅█▆█▆▇▇▇▇▆█▇▇▇▆▇▇▇▇▇▇█▇█▇█▆█
val/loss,██▇▇▆▅▄▄▄▃▃▂▂▃▂▂▁▂▁▂▂▃▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▂▁

0,1
epoch,50.0
train/acc,0.80365
train/loss,0.42734
val/acc,0.8764
val/loss,0.38093


▶ run: activation=elu, batch_size=128


0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
train/acc,▁▅▅▅▅▆▆▆▇▇▇▇▇▇▇█▇███▇███▇███████████████
train/loss,█▆▆▆▅▅▅▄▄▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▁▁▁▁▁▁▁▁▁▁▁
val/acc,▁▁▁▁▁▁▁▂▃▃▄▅▅▅▆▆▆▆▇▆▇█▇█▇▇▇▇▇▇▇▇▇██▇▇█▆█
val/loss,████▇▆▆▆▅▅▄▄▃▃▃▂▂▂▂▂▁▁▂▂▂▁▁▂▁▂▂▁▁▁▁▁▁▁▁▁

0,1
epoch,50.0
train/acc,0.81767
train/loss,0.4185
val/acc,0.8764
val/loss,0.37587


▶ run: activation=leaky_relu, batch_size=16


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
train/acc,▁▃▄▄▅▆▆▇▇▆▇▇█▇▇█▇▇█▇▇█▇▇█▇▇▇▇▇▇▇██▇▇█▇▇█
train/loss,█▆▆▆▅▄▃▃▃▂▃▃▂▂▂▂▂▁▂▂▂▂▂▂▂▂▁▂▂▂▁▁▂▂▂▂▂▂▁▂
val/acc,▁▃▃▅▇▇▇▇███▇▇██▇▆▇▇▇▇██▆▇▇▆▇▆▇▇█▇▆▇▆▆▇▇▆
val/loss,█▇▇▆▅▅▂▂▂▂▁▁▂▁▂▂▂▂▂▂▁▂▂▁▂▂▂▂▂▂▂▂▂▃▂▂▂▂▂▂

0,1
epoch,50.0
train/acc,0.82328
train/loss,0.40224
val/acc,0.82022
val/loss,0.41078


▶ run: activation=leaky_relu, batch_size=32


0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/acc,▁▄▄▅▅▆▆▇▇▇▇▇▇▇█▇█▇█████▇██▇██▇▇▇████▇███
train/loss,█▆▆▅▅▄▄▄▃▃▃▃▂▂▂▂▂▁▂▁▂▂▂▂▂▂▂▁▁▂▂▂▁▂▁▂▂▁▂▁
val/acc,▁▁▂▂▄▅▅▅▆▆▆██▆▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇█▆▇▇▇▇▇█▇▇▆
val/loss,█▇▆▆▆▅▄▄▃▃▂▁▃▂▁▁▃▂▁▁▁▂▁▁▁▁▁▂▁▁▂▁▂▁▁▁▁▁▁▃

0,1
epoch,50.0
train/acc,0.82048
train/loss,0.39124
val/acc,0.82022
val/loss,0.44993


▶ run: activation=leaky_relu, batch_size=64


0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/acc,▁▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇█▇█▇▇▇█████████▇█
train/loss,█▆▆▅▅▅▄▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▂▁▂▂▁▁▁▁▁▁▁▁▂▁▁▁
val/acc,▁▁▂▂▃▄▅▄▅▅▆▆▅▆▇▆▇█▇▇▇▇█▇▇█▇▇▇▇▇▇▇█▇█▇▇█▇
val/loss,██▇▆▆▆▅▅▅▅▅▄▄▄▃▃▂▃▂▁▁▂▁▂▁▁▁▁▂▁▂▂▁▁▁▁▁▁▁▁

0,1
epoch,50.0
train/acc,0.83029
train/loss,0.38568
val/acc,0.8427
val/loss,0.39351


▶ run: activation=leaky_relu, batch_size=128


0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
train/acc,▁▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇█▇█▇██▇▇█████▇█████
train/loss,█▆▆▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
val/acc,▁▁▁▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▆▇██▆███▇▇▇▇
val/loss,███▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▃▃▃▂▃▃▂▂▁▁▂▁▁▁▂▁▁▁▂▁▁▁

0,1
epoch,50.0
train/acc,0.84432
train/loss,0.38334
val/acc,0.84831
val/loss,0.39078


총 소요시간: 128.6s


Unnamed: 0,activation,batch_size,best_epoch,val_acc,val_loss,train_acc,train_loss
0,elu,32,9,0.893258,0.410284,0.779804,0.467223
1,elu,16,13,0.88764,0.394448,0.796634,0.47252
2,elu,64,28,0.88764,0.38325,0.788219,0.457211
3,elu,128,28,0.88764,0.387593,0.799439,0.439278
4,leaky_relu,32,14,0.88764,0.381679,0.779804,0.452256
5,relu,32,14,0.88764,0.38814,0.781206,0.457049
6,sigmoid,128,44,0.88764,0.371792,0.809257,0.448236
7,leaky_relu,128,45,0.882022,0.398723,0.823282,0.390143
8,relu,16,13,0.882022,0.397881,0.795231,0.464672
9,sigmoid,64,24,0.882022,0.376557,0.786816,0.458726


Top-5 결과 저장 → ./outputs/grid_search_top5.csv


# 셀 4: 최고 구성 복원 및 submission 생성

In [None]:
assert len(df_results) > 0, "그리드 서치 결과가 없습니다."

best_act = df_results.loc[0, "activation"]
best_bs  = int(df_results.loc[0, "batch_size"])
print(f"BEST → activation={best_act}, batch_size={best_bs}, "
      f"val_acc={df_results.loc[0,'val_acc']:.4f}, epoch={int(df_results.loc[0,'best_epoch'])}")

# 해당 state 찾기
best_state = [r["state"] for r in records if r["activation"]==best_act and r["batch_size"]==best_bs][0]

# 모델 재구성 & 로드
best_model = MLP(in_dim=in_dim, hidden_dims=HIDDEN_DIMS, num_classes=2, act=best_act, dropout=DROPOUT).to(device)
best_model.load_state_dict(best_state)
best_model.eval()

# test 예측
test_loader = DataLoader(test_ds, batch_size=len(test_ds), shuffle=False)
with torch.no_grad():
    for b in test_loader:
        x = b["input"].to(device)
        logits = best_model(x)
        pred = torch.argmax(logits, dim=1).cpu().numpy()

# submission 저장
test_csv = pd.read_csv(TEST_CSV_PATH)
submission = pd.DataFrame({"PassengerId": test_csv["PassengerId"].values, "Survived": pred.astype(int)})
out_path = "./outputs/submission_best.csv"
submission.to_csv(out_path, index=False)
print(f"Saved submission → {out_path}")


BEST → activation=elu, batch_size=32, val_acc=0.8933, epoch=9
Saved submission → ./outputs/submission_best.csv


![alt text](image.png)

# 숙제 후기

### 데이터 전처리의 중요성을 알게 되었습니다. 어떤 데이터를 사용하느냐, 결측치가 있냐 없냐, 학습에 중요한 데이터인가 파악하는게 모델의 성능을 올리는 가장 기본적인 방법인 것 같습니다.
### 어떤 학습이냐에 따라 적합한 활성화 함수, 배치 크기 설정이 달라진다는 것을 알았다. 어떤 함수와 배치 크기를 몇으로 정하는지는 아직 모르겠다. 많이 해봐야 알 것 같다.
### WandB를 처음 사용해 보았는데, 시각화가 잘 되어있어서 성능 비교가 편한 것 같다.