In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from pytorch_tabnet.tab_model import TabNetClassifier

df = pd.read_csv("./spea_data_nvme/spea_pass_fail_data.csv")
X = df.iloc[:, :-1].values.astype(np.float32)
y = df.iloc[:, -1].values.astype(np.int64)
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.20, random_state=42, stratify=y
)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
device = "cuda" if torch.cuda.is_available() else "cpu"


In [8]:
np.set_printoptions(suppress=True, linewidth=120)
print("MEAN =", scaler.mean_.tolist())
print("STD  =", scaler.scale_.tolist()) # For normalize in Real System.(=Device)

MEAN = [1.0330383038436266, 0.9921870873230328, -5.986730383311507e-05, 0.999709199079134, 0.00018871763767504268, 1.0001191663871114, 0.00010791785095278968, -6.889660576601636e-06, 0.9860516957259265, 0.9729566646191812, -3.6380720025332668e-06, 1.0248069517773497, 0.9910081398957061, 1.0001973061100071, -0.0001358640079276542, -0.00018716975807537948, -0.00023357994728190492, 1.050537061310052, 0.00041538498954872777, 0.00012647459659548423, 0.99868456479531, 0.9927704092292291, 0.9914302523915063, 0.9999569777123373, 1.0097155053653102, 0.9598040141656471, 1.1414477781874402e-05, 1.0344419401524945]
STD  = [0.3652252280428611, 0.48739150905466955, 1.0076352657638126, 1.0493326559525138, 1.006322919804772, 1.4002176241388296, 1.0094259855862298, 1.0063325457549883, 0.505650927798952, 0.5253252364315946, 1.0060853076783716, 0.3809724692806453, 0.47504535280103805, 1.027789063737055, 1.0091309884792374, 1.0059060292420945, 1.0062963827082931, 0.16452628281899337, 1.0087259731914986, 1

In [None]:
class LinearStudent(nn.Module):
    def __init__(self, in_dim):
        super().__init__()
        self.fc = nn.Linear(in_dim, 1)
    def forward(self, x):
        return torch.sigmoid(self.fc(x))

class SmallMLP(nn.Module):
    def __init__(self, in_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, 64), nn.ReLU(),
            nn.Linear(64, 1), nn.Sigmoid()
        )
    def forward(self, x): return self.net(x)

students = {
    "linear": LinearStudent(X_train.shape[1]),
    "sml_mlp": SmallMLP(X_train.shape[1])
}

In [None]:
xgb = XGBClassifier()
xgb.load_model("./save/XGB/xgb.model")

class MLP(nn.Module):
    def __init__(self, in_dim=28):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 1),
            nn.Sigmoid(), # BCE Loss 였음.
        )
    def forward(self, x):
        return self.net(x)

mlp_teacher = MLP(in_dim=X_train.shape[1])
mlp_teacher.load_state_dict(torch.load("./save/MLP/mlp.pt", map_location=device))
mlp_teacher.to(device).eval()

tab_teacher = TabNetClassifier()
tab_teacher.load_model("./save/TABNET/tabnet_best_model.zip")

with torch.no_grad():
    p_xgb = xgb.predict_proba(X_train)[:, 1]
    p_tab = tab_teacher.predict_proba(X_train)[:, 1]
    p_mlp = []
    for i in range(0, len(X_train), 2048):
        xb = torch.tensor(X_train[i:i+2048], device=device)
        p_mlp.append(mlp_teacher(xb).cpu().numpy())
    p_mlp = np.vstack(p_mlp).ravel()

T = 4.0
soft_logits = np.log(np.clip(np.stack([p_xgb, p_mlp, p_tab], 1), 1e-6, 1-1e-6))
soft_logits /= T
p_soft = torch.sigmoid(torch.tensor(soft_logits).mean(1)).numpy()

train_ds = TensorDataset(
    torch.tensor(X_train),
    torch.tensor(y_train),
    torch.tensor(p_soft, dtype=torch.float32)
)
val_ds = TensorDataset(torch.tensor(X_val),
                       torch.tensor(y_val, dtype=torch.float32))
train_loader = DataLoader(train_ds, batch_size=512, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=2048, shuffle=False)



In [None]:
a = 0.4
epochs = 100
patience_limit = 10

criterion_hard = nn.BCELoss()
criterion_soft = nn.BCELoss()

for name, model in students.items():
    model.to(device)
    opt = torch.optim.Adam(model.parameters(), lr=3e-3)
    best_auc, patience = 0, 0

    for epoch in range(epochs):
        model.train()
        for xb, yb_hard, yb_soft in train_loader:
            xb = xb.to(device)
            yb_hard = yb_hard.float().unsqueeze(1).to(device)
            yb_soft = yb_soft.float().unsqueeze(1).to(device)

            opt.zero_grad()
            out = model(xb)
            loss = a*criterion_hard(out, yb_hard) + (1-a)*criterion_soft(out, yb_soft)
            loss.backward()
            opt.step()

        model.eval()
        preds = []
        with torch.no_grad():
            for xb, _ in val_loader:
                preds.append(model(xb.to(device)).cpu().numpy())
        preds = np.vstack(preds).ravel()
        auc = roc_auc_score(y_val, preds)
        if auc > best_auc:
            best_auc, patience = auc, 0
            torch.save(model.state_dict(), f"./save/STUDENT/{name}.pt")
        else:
            patience += 1
        if patience >= patience_limit:
            print(f"{name} early-stop @ {epoch}")
            break
    print(f"{name} best ROC-AUC: {best_auc:.4f}")


linear early-stop @ 26
linear best ROC-AUC: 0.6833
sml_mlp early-stop @ 35
sml_mlp best ROC-AUC: 0.8227


In [5]:
for name, cls in [("linear", LinearStudent), ("sml_mlp", SmallMLP)]:
    model = cls(X_train.shape[1]).to(device)
    model.load_state_dict(torch.load(f"./save/STUDENT/{name}.pt"))
    model.eval()
    with torch.no_grad():
        preds = []
        for xb, _ in val_loader:
            preds.append(model(xb.to(device)).cpu().numpy())
    preds = np.vstack(preds).ravel()
    acc = accuracy_score(y_val, preds>0.5)
    auc = roc_auc_score(y_val, preds)
    print(f"{name}: ACC {acc:.4f}, AUC {auc:.4f}")


linear: ACC 0.6137, AUC 0.6833
sml_mlp: ACC 0.7356, AUC 0.8227


In [None]:
import torch, torch.nn as nn
from torch.nn.utils import prune
from sklearn.metrics import accuracy_score, roc_auc_score

def evaluate(model, dataloader, y_true):
    model.eval()
    preds = []
    with torch.no_grad():
        for xb, _ in dataloader:
            preds.append(model(xb).cpu().numpy())
    preds = torch.vstack([torch.tensor(p) for p in preds]).numpy().ravel()
    acc = accuracy_score(y_true, preds > 0.5)
    auc = roc_auc_score(y_true, preds)
    return acc, auc

device = "cpu"
in_dim  = X_val.shape[1]

sml = SmallMLP(in_dim).to(device)
sml.load_state_dict(torch.load("./save/STUDENT/sml_mlp.pt", map_location=device))


for module in sml.modules():
    if isinstance(module, nn.Linear):
        prune.l1_unstructured(module, name="weight", amount=0.5)
        prune.remove(module, "weight")

pruned_acc, pruned_auc = evaluate(sml, val_loader, y_val)
print(f"[Pruned]  ACC {pruned_acc:.4f}  AUC {pruned_auc:.4f}")

torch.save(sml.state_dict(), "./save/STUDENT/sml_mlp_pruned.pt")

quantized = torch.quantization.quantize_dynamic(
    sml, {nn.Linear}, dtype=torch.qint8
)

q_acc, q_auc = evaluate(quantized, val_loader, y_val)
print(f"[Quantized] ACC {q_acc:.4f}  AUC {q_auc:.4f}")

torch.save(quantized.state_dict(), "./save/STUDENT/sml_mlp_pruned_int8.pt")

[Pruned]  ACC 0.6675  AUC 0.7668
[Quantized] ACC 0.6638  AUC 0.7620
