In [1]:
import os

# 현재 경로 확인
current_dir = os.getcwd()

# 1단계 상위 디렉토리 경로 생성
parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))


os.chdir(parent_dir)

In [6]:
import numpy as np
import pandas as pd
import joblib
c_data = np.load('data/reaction.npy')
c_data[c_data[:,-1]>80]

array([[7.0e+02, 5.0e+01, 5.0e+01, ..., 4.0e+00, 6.0e+01, 8.1e+01],
       [7.5e+02, 5.0e+01, 5.0e+01, ..., 4.0e+00, 6.0e+01, 9.0e+01],
       [7.5e+02, 5.0e+01, 5.0e+01, ..., 4.0e+00, 6.0e+01, 9.1e+01],
       ...,
       [8.0e+02, 2.5e+01, 2.5e+01, ..., 5.0e-01, 1.2e+02, 9.5e+01],
       [8.5e+02, 2.5e+01, 2.5e+01, ..., 5.0e-01, 1.2e+02, 9.7e+01],
       [9.0e+02, 2.5e+01, 2.5e+01, ..., 5.0e-01, 1.2e+02, 9.8e+01]])

In [None]:
import os
import numpy as np
import joblib
import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MaxAbsScaler,StandardScaler

# -----------------------
# 0) Load
# -----------------------
x_data = np.load('./data/active.npy')      # (N, x_dim)
c_all  = np.load('./data/reaction.npy')    # (N, c_dim+1)  마지막 열이 y라고 가정
# s_data = np.load('./data/promoter.npy')  # 필요하면 아래에 같이 붙이면 됨

# -----------------------
# 1) Split c -> (c_feat, y)
# -----------------------
c_feat = c_all.astype(np.float32)  # 조건 입력
y_data = c_all[:, -1].astype(np.float32)   # conversion 라벨

x_data = x_data.astype(np.float32)

# -----------------------
# 2) Train/Val/Test split (0.6 / 0.2 / 0.2)
#    (라벨/비라벨 분리는 train 내부에서만 진행)
# -----------------------
x_train, x_tmp, c_train, c_tmp, y_train, y_tmp = train_test_split(
    x_data, c_feat, y_data, test_size=0.4, random_state=21
)
x_val, x_test, c_val, c_test, y_val, y_test = train_test_split(
    x_tmp, c_tmp, y_tmp, test_size=0.5, random_state=21
)

# -----------------------
# 3) Scaling (fit은 train만)
# -----------------------
x_scaler = MaxAbsScaler()
c_scaler = StandardScaler()

x_train_s = x_scaler.fit_transform(x_train)
c_train_s = c_scaler.fit_transform(c_train)

x_val_s  = x_scaler.transform(x_val)
x_test_s = x_scaler.transform(x_test)

c_val_s  = c_scaler.transform(c_val)
c_test_s = c_scaler.transform(c_test)

os.makedirs('./torch', exist_ok=True)
joblib.dump(x_scaler, './torch/25x_scaler.pkl')
joblib.dump(c_scaler, './torch/25c_scaler.pkl')

# -----------------------
# 4) Make labeled/unlabeled split INSIDE TRAIN
#    rule: y > 80  -> labeled
# -----------------------
thr = 80.0
labeled_mask = (y_train > thr)
unlabeled_mask = ~labeled_mask

xL = x_train_s[labeled_mask]
cL = c_train_s[labeled_mask]
yL = y_train[labeled_mask]        # y는 스케일링 안 함(원하면 따로 scaler 추가)

xU = x_train_s[unlabeled_mask]
cU = c_train_s[unlabeled_mask]

# -----------------------
# 5) Torch tensors
# -----------------------
def to_torch(a):
    return torch.tensor(a, dtype=torch.float32)

xL_t, cL_t, yL_t = to_torch(xL), to_torch(cL), to_torch(yL).unsqueeze(1)  # (nL, 1)
xU_t, cU_t       = to_torch(xU), to_torch(cU)

xV_t, cV_t, yV_t = to_torch(x_val_s),  to_torch(c_val_s),  to_torch(y_val).unsqueeze(1)
xT_t, cT_t, yT_t = to_torch(x_test_s), to_torch(c_test_s), to_torch(y_test).unsqueeze(1)

# -----------------------
# 6) Datasets / Loaders
# -----------------------
labeled_train_ds   = TensorDataset(xL_t, cL_t, yL_t)  # (x,c,y)
unlabeled_train_ds = TensorDataset(xU_t, cU_t)        # (x,c)

val_ds  = TensorDataset(xV_t, cV_t, yV_t)             # 검증은 y로 성능 확인
test_ds = TensorDataset(xT_t, cT_t, yT_t)

batch_size = 64

labeled_loader = DataLoader(labeled_train_ds, batch_size=batch_size, shuffle=True, drop_last=True)
unlabeled_loader = DataLoader(unlabeled_train_ds, batch_size=batch_size, shuffle=True, drop_last=True)

val_loader  = DataLoader(val_ds,  batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

# -----------------------
# 7) Save loaders (추천: dataset 저장이 더 안전하지만, 요청대로 loader 저장)
# -----------------------
torch.save(labeled_loader,   "./torch/labeled_train_loader_ar.pt")
torch.save(unlabeled_loader, "./torch/unlabeled_train_loader_ar.pt")
torch.save(val_loader,       "./torch/val_loader_ar.pt")
torch.save(test_loader,      "./torch/test_loader_ar.pt")

print(f"[train] labeled: {len(labeled_train_ds)} / unlabeled: {len(unlabeled_train_ds)}")
print(f"[val]   {len(val_ds)}")
print(f"[test]  {len(test_ds)}")


[train] labeled: 694 / unlabeled: 2850
[val]   1182
[test]  1182
