In [1]:
import os
import sys
import re

project_root = "/root/work/tenset"
os.environ["TVM_HOME"] = f"{project_root}"
os.environ["TVM_LIBRARY_PATH"] = f"{project_root}/build"
if f"{project_root}/python" not in sys.path:
    sys.path.insert(0, f"{project_root}/python")

sys.path = [p for p in sys.path if not p.startswith(f"{project_root}/build")]
sys.path.append(f"{project_root}/build")
os.environ["LD_LIBRARY_PATH"] = f"{project_root}/build:" + os.environ.get("LD_LIBRARY_PATH", "")

In [2]:
import sys
sys.path.append("/root/work/tenset/scripts")

from tvm import auto_scheduler
from print_programs import return_program
from tvm.auto_scheduler.feature import get_per_store_features_from_file
from make_dataset import load_and_register_tasks
import numpy as np

json_file = "/root/work/tenset/dataset/measure_records_tenset/k80/([9f4c6b76f51d20e5c3bfb1817edd446e,1,64,64,64,1,1,64,256,1,1,1,256,1,64,64,256],cuda).json"

tasks = load_and_register_tasks()
inputs, results = auto_scheduler.RecordReader(json_file).read_lines()
raw_features, raw_normalized_throughputs, task_ids, min_latency = get_per_store_features_from_file(json_file, 10000)

In [5]:
# records = {
#     "schedules": [],
#     "cost_mean": [],
#     "feature" : []
# }

# for i in range(len(inputs)):
#     state, cost = return_program(inputs[i], results[i])
#     # break
#     if state is not None:
#         cost_mean = np.mean([x.value for x in cost])
#         feature = raw_features[i]
#         records["feature"].append(feature)
#         records["schedules"].append(state)
#         records["cost_mean"].append(cost_mean)


# features = np.array(records["feature"], dtype=np.float32)
# costs = np.array(records["cost_mean"], dtype=np.float32) * 1000

# features = features.sum(axis=1)

In [3]:
features = []
costs = []
for feature, throughput in zip(raw_features, raw_normalized_throughputs):
    if throughput > 1.0e-10:
        features.append(feature)
        costs.append(-np.log(throughput))


features = np.array(features, dtype=np.float32)
costs = np.array(costs, dtype=np.float32)

# features = features.sum(axis=1)

In [4]:
import torch
import torch.nn as nn

class MLPRegressor(nn.Module):
    def __init__(self, in_dim, hidden_dim=256):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(in_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
        )

        self.net = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1),
        )

    def forward(self, x):
        
        
        h = self.encoder(x)
        # torch N*4,D -> N,4,D
        h = h.view(-1, 4, h.size(-1))
        # 평균값 계산 N,4,D -> N,D
        h_mean = torch.mean(h, dim=1)
        return self.net(h_mean)



In [5]:
from torch.nn import functional as F
def pairwise_ranking_loss(cost_pred, cost_true, margin=0.1):
    """
    Pairwise ranking loss: 실제 cost 순서를 예측이 유지하도록.
    cost_true[i] < cost_true[j] 이면 cost_pred[i] < cost_pred[j] + margin
    """
    batch_size = cost_pred.size(0)
    if batch_size < 2:
        return torch.tensor(0.0, device=cost_pred.device)
    
    # 모든 쌍에 대해 ranking loss 계산
    idx = torch.arange(batch_size, device=cost_pred.device)
    i_idx, j_idx = torch.meshgrid(idx, idx, indexing='ij')
    mask = i_idx < j_idx  # upper triangular only
    
    pred_i = cost_pred[i_idx[mask]]
    pred_j = cost_pred[j_idx[mask]]
    true_i = cost_true[i_idx[mask]]
    true_j = cost_true[j_idx[mask]]
    
    # label: 1 if true_i < true_j, -1 otherwise
    labels = torch.sign(true_j - true_i).float()
    
    # Margin ranking loss
    loss = F.margin_ranking_loss(pred_j.view(-1), pred_i.view(-1), labels.view(-1), margin=margin)
    return loss

def reg_loss_fn(cost_pred, cost_true, loss_type='mse'):
    """
    기본 회귀 손실 (MSE 또는 MAE)
    """
    if loss_type == 'mse':
        return F.mse_loss(cost_pred, cost_true)
    else:  # mae
        return F.l1_loss(cost_pred, cost_true)
    
def total_loss_fn(cost_pred, cost_true, alpha=0.5, margin=0.1):
    """
    회귀 손실과 쌍별 순위 손실의 가중 합
    """
    reg_loss = reg_loss_fn(cost_pred, cost_true, loss_type='mse')
    rank_loss = pairwise_ranking_loss(cost_pred, cost_true, margin=margin)
    return alpha * reg_loss + rank_loss

In [6]:
def pair_accuracy(cost_pred, labels):
    """
    cost_pred, labels: (B,) 텐서
    """
    n_samples = min(1000, len(cost_pred))
    sample_indices = np.random.choice(len(cost_pred), n_samples, replace=False)

    with torch.no_grad():
        correct = 0
        total = 0
        for i in range(n_samples):
            for j in range(i + 1, n_samples):
                idx_i = sample_indices[i]
                idx_j = sample_indices[j]
                pred_diff = cost_pred[idx_i] - cost_pred[idx_j]
                true_diff = labels[idx_i] - labels[idx_j]
                if (pred_diff * true_diff) > 0:
                    correct += 1
                total += 1
        accuracy = correct / total if total > 0 else 0.0
    return accuracy

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch.utils.data import DataLoader, Dataset

class NpzRegressionDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X).float()
        self.y = torch.from_numpy(y).float()
        # y shape이 (N,)이면 (N,1)로 바꿔주는 게 편할 때가 많음
        if self.y.ndim == 1:
            self.y = self.y.unsqueeze(1)

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_size = 64
random_indices = np.random.permutation(len(features))[:train_size]
features_64 = features[random_indices]
costs_64 = costs[random_indices]
X_train = features_64
y_train = costs_64
X_val = features[~np.isin(np.arange(len(features)), random_indices)]
y_val = costs[~np.isin(np.arange(len(costs)), random_indices)]

# X_train, X_val, y_train, y_val = train_test_split(
#     features, costs, test_size=0.2, random_state=42
# )

X_train_flat = X_train.reshape(X_train.shape[0] * X_train.shape[1], X_train.shape[2])
X_val_flat = X_val.reshape(X_val.shape[0] * X_val.shape[1], X_val.shape[2])

scaler = StandardScaler()
X_train_flat_scaled = scaler.fit_transform(X_train_flat)
X_val_flat_scaled = scaler.transform(X_val_flat)
X_train_scaled = X_train_flat_scaled.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2])
X_val_scaled = X_val_flat_scaled.reshape(X_val.shape[0], X_val.shape[1], X_val.shape[2])

train_dataset = NpzRegressionDataset(X_train_scaled, y_train)
val_dataset   = NpzRegressionDataset(X_val_scaled,   y_val)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader   = DataLoader(val_dataset,   batch_size=128, shuffle=False)

In [11]:
num_epochs = 200
input_dim = X_train.shape[-1]
model = MLPRegressor(input_dim, hidden_dim=256)


# criterion = nn.MarginRankingLoss(margin=0.01)
optimizer = torch.optim.Adam(model.parameters(), lr=2e-4)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(1, num_epochs+1):
    # --- train ---
    model.train()
    train_loss = 0.0
    for xb, yb in train_loader:
        xb = xb.to(device)
        yb = yb.to(device)
        yb = yb.squeeze()
        # flatten input N,4,D -> N*4,D
        xb = xb.view(xb.size(0) * xb.size(1), xb.size(2))

        pred = model(xb)
        loss = total_loss_fn(pred, yb, alpha=0.01, margin=0.01)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * xb.size(0)

    train_loss /= len(train_dataset)

    # --- validation ---
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb = xb.to(device)
            yb = yb.to(device)
            yb = yb.squeeze()
            xb = xb.view(xb.size(0) * xb.size(1), xb.size(2))

            pred = model(xb)
            loss = total_loss_fn(pred, yb, alpha=0.01, margin=0.01)
            val_loss += loss.item() * xb.size(0)

    val_loss /= len(val_dataset)

    if (epoch) % 50 == 0:
        print(f"Epoch [{epoch}/{num_epochs}] "
              f"train_loss={train_loss:.4f} val_loss={val_loss:.4f}")



  return F.mse_loss(cost_pred, cost_true)
  return F.mse_loss(cost_pred, cost_true)
  return F.mse_loss(cost_pred, cost_true)


Epoch [50/200] train_loss=0.1069 val_loss=0.1823
Epoch [100/200] train_loss=0.0962 val_loss=0.1771
Epoch [150/200] train_loss=0.0959 val_loss=0.1759
Epoch [200/200] train_loss=0.0959 val_loss=0.1758


In [12]:
rank_inputs = torch.from_numpy(features).float().to(device)
rank_inputs = rank_inputs.view(rank_inputs.size(0) * rank_inputs.size(1), rank_inputs.size(2))
rank_labels = torch.from_numpy(costs).float().to(device)
pred = model(rank_inputs)
rank_r2 = pair_accuracy(pred, rank_labels)
print(f"Final Pairwise Ranking Accuracy: {rank_r2:.4f}")

Final Pairwise Ranking Accuracy: 0.4006
