In [1]:
import os
import sys
import re

project_root = "/root/work/tenset"
os.environ["TVM_HOME"] = f"{project_root}"
os.environ["TVM_LIBRARY_PATH"] = f"{project_root}/build"
if f"{project_root}/python" not in sys.path:
    sys.path.insert(0, f"{project_root}/python")

sys.path = [p for p in sys.path if not p.startswith(f"{project_root}/build")]
sys.path.append(f"{project_root}/build")
os.environ["LD_LIBRARY_PATH"] = f"{project_root}/build:" + os.environ.get("LD_LIBRARY_PATH", "")

In [2]:
import torch
from torch.utils.data import Dataset, DataLoader

class NpzRegressionDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X).float()
        self.y = torch.from_numpy(y).float()
        # y shape이 (N,)이면 (N,1)로 바꿔주는 게 편할 때가 많음
        if self.y.ndim == 1:
            self.y = self.y.unsqueeze(1)

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

import torch.nn as nn



class MLPRegressor(nn.Module):
    def __init__(self, in_dim, out_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, out_dim),
        )

    def forward(self, x):
        return self.net(x)



In [6]:
import numpy as np
import torch

# record_index
# vector_index
# diff_indices
# diff_values
# cost

json_diffs = np.load("i_vectors_diffs.npz")

input_data = np.log(json_diffs["diff_values"]+1e-8)
# input_data = json_diffs["diff_values"]
cost = -np.log(json_diffs["cost"])

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X_train, X_val, y_train, y_val = train_test_split(
    input_data, cost, test_size=0.2, random_state=42
)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled   = scaler.transform(X_val)

train_dataset = NpzRegressionDataset(X_train_scaled, y_train)
val_dataset   = NpzRegressionDataset(X_val_scaled,   y_val)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader   = DataLoader(val_dataset,   batch_size=128, shuffle=False)

In [10]:
num_epochs = 100
input_dim = X_train.shape[1]
output_dim = 1
model = MLPRegressor(input_dim, output_dim)


criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    # --- train ---
    model.train()
    train_loss = 0.0
    for xb, yb in train_loader:
        xb = xb.to(device)
        yb = yb.to(device)

        pred = model(xb)
        loss = criterion(pred, yb)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * xb.size(0)

    train_loss /= len(train_dataset)

    # --- validation ---
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb = xb.to(device)
            yb = yb.to(device)

            pred = model(xb)
            loss = criterion(pred, yb)
            val_loss += loss.item() * xb.size(0)

    val_loss /= len(val_dataset)

    print(f"Epoch [{epoch+1}/{num_epochs}] "
          f"train_loss={train_loss:.4f} val_loss={val_loss:.4f}")


Epoch [1/100] train_loss=15.2328 val_loss=3.0228
Epoch [2/100] train_loss=2.4433 val_loss=1.3649
Epoch [3/100] train_loss=1.1367 val_loss=0.8493
Epoch [4/100] train_loss=0.8275 val_loss=0.6965
Epoch [5/100] train_loss=0.6799 val_loss=0.5936
Epoch [6/100] train_loss=0.5879 val_loss=0.5446
Epoch [7/100] train_loss=0.5061 val_loss=0.4729
Epoch [8/100] train_loss=0.4488 val_loss=0.4311
Epoch [9/100] train_loss=0.3924 val_loss=0.3994
Epoch [10/100] train_loss=0.3390 val_loss=0.3649
Epoch [11/100] train_loss=0.2992 val_loss=0.3420
Epoch [12/100] train_loss=0.2661 val_loss=0.3175
Epoch [13/100] train_loss=0.2425 val_loss=0.3178
Epoch [14/100] train_loss=0.2226 val_loss=0.2818
Epoch [15/100] train_loss=0.1989 val_loss=0.2775
Epoch [16/100] train_loss=0.1825 val_loss=0.2538
Epoch [17/100] train_loss=0.1653 val_loss=0.2441
Epoch [18/100] train_loss=0.1509 val_loss=0.2411
Epoch [19/100] train_loss=0.1435 val_loss=0.2279
Epoch [20/100] train_loss=0.1319 val_loss=0.2173
Epoch [21/100] train_loss=0.

In [1]:
import matplotlib.font_manager
print([f.fname for f in matplotlib.font_manager.fontManager.ttflist])

['/root/work/tenset/.venv/lib/python3.8/site-packages/matplotlib/mpl-data/fonts/ttf/NanumGothicBold.ttf', '/root/work/tenset/.venv/lib/python3.8/site-packages/matplotlib/mpl-data/fonts/ttf/NanumMyeongjo.ttf', '/root/work/tenset/.venv/lib/python3.8/site-packages/matplotlib/mpl-data/fonts/ttf/NanumPen.ttf', '/root/work/tenset/.venv/lib/python3.8/site-packages/matplotlib/mpl-data/fonts/ttf/STIXGeneral.ttf', '/root/work/tenset/.venv/lib/python3.8/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizOneSymReg.ttf', '/root/work/tenset/.venv/lib/python3.8/site-packages/matplotlib/mpl-data/fonts/ttf/NanumBrush.ttf', '/root/work/tenset/.venv/lib/python3.8/site-packages/matplotlib/mpl-data/fonts/ttf/STIXSizOneSymBol.ttf', '/root/work/tenset/.venv/lib/python3.8/site-packages/matplotlib/mpl-data/fonts/ttf/NanumMyeongjoEcoBold.ttf', '/root/work/tenset/.venv/lib/python3.8/site-packages/matplotlib/mpl-data/fonts/ttf/NanumGothicLight.ttf', '/root/work/tenset/.venv/lib/python3.8/site-packages/matplotlib/