In [None]:
import numpy as np, torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import TensorDataset, DataLoader, Subset
from laplace import Laplace
import matplotlib.pyplot as plt
import laplace
import pickle
import os
import random
import time

from utils.DataUtils import save_datasets, load_datasets, extract_samples_from_unlabeled, delete_samples_from_unlabeled, add_samples_to_labeled, add_and_extract_and_delete_samples
from utils.ModelUtils import initialize_model_weights, train_model, evaluate, count_parameters, save_model, load_model
from utils.LaplaceUtils import return_hessian_eigenvalues, compute_outcome_hessian_from_model, symmetric_matrix_sqrt, fast_jacobian, low_rank_updated_part
from utils.AlFunctions import DoptScore_per_sample, AoptScore_per_sample, ToptScore_per_sample, selection_AL_scores, AL_finetune_model


In [3]:
'''Dataset Loader for MNIST dataset'''
import torchvision
import torchvision.transforms as transforms

np.random.seed(0)
torch.manual_seed(0)

# MNIST 다운로드 및 로드
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST mean, std
])

# Train/Test split
mnist_train = torchvision.datasets.MNIST(
    root='./data', 
    train=True, 
    download=True, 
    transform=transform
)

mnist_test = torchvision.datasets.MNIST(
    root='./data', 
    train=False, 
    download=True, 
    transform=transform
)

# Flatten images (28x28 -> 784)
Xtr = mnist_train.data.numpy().reshape(-1, 784).astype(np.float32) / 255.0
ytr = mnist_train.targets.numpy().astype(np.int64)
Xte = mnist_test.data.numpy().reshape(-1, 784).astype(np.float32) / 255.0
yte = mnist_test.targets.numpy().astype(np.int64)

# Standardization (optional, 이미 normalize 했지만 추가 스케일링 원하면)
scaler = StandardScaler().fit(Xtr)
Xtr = scaler.transform(Xtr)
Xte = scaler.transform(Xte)

# Convert to tensors
Xtr_t = torch.from_numpy(Xtr)
ytr_t = torch.from_numpy(ytr).long()
Xte_t = torch.from_numpy(Xte)
yte_t = torch.from_numpy(yte).long()

# TensorDataset
train_set = TensorDataset(Xtr_t, ytr_t)
test_set = TensorDataset(Xte_t, yte_t)

print(f"✅ MNIST loaded")
print(f"   Train set: {train_set}, n_train = {len(train_set)}")
print(f"   Test set: {test_set}, n_test = {len(test_set)}")
print(f"   Input dim: {Xtr_t.shape[1]}")
print(f"   Output classes: {len(torch.unique(ytr_t))}")

100%|██████████| 9.91M/9.91M [00:15<00:00, 631kB/s] 
100%|██████████| 28.9k/28.9k [00:00<00:00, 153kB/s]
100%|██████████| 1.65M/1.65M [00:01<00:00, 926kB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 2.22MB/s]


✅ MNIST loaded
   Train set: <torch.utils.data.dataset.TensorDataset object at 0x0000021A1714AD70>, n_train = 60000
   Test set: <torch.utils.data.dataset.TensorDataset object at 0x0000021A17149090>, n_test = 10000
   Input dim: 784
   Output classes: 10


In [None]:
np.random.seed(0)
torch.manual_seed(0)

# Data 준비 (기존 Xtr_t, ytr_t 사용)
digits = load_digits()
x, y = digits.data.astype(np.float32), digits.target.astype(np.int64)
Xtr, Xte, ytr, yte = train_test_split(x, y, test_size=0.25, stratify=y, random_state=42)
scaler = StandardScaler().fit(Xtr)
Xtr, Xte = scaler.transform(Xtr), scaler.transform(Xte)

Xtr_t = torch.from_numpy(Xtr)          # (N_train, D)
ytr_t = torch.from_numpy(ytr).long()   # (N_train,)
Xte_t = torch.from_numpy(Xte)
yte_t = torch.from_numpy(yte)

num_pretrain = 30
indices = list(range(len(Xtr_t)))
random.shuffle(indices)
labeled_indices = indices[:num_pretrain]       # 실제 dataset 인덱스들
unlabeled_indices = indices[num_pretrain:]

# 기본 전체 텐서데이터셋 하나 생성
full_train_dataset = TensorDataset(Xtr_t, ytr_t)

# Subset으로 라벨/언라벨드 관리 (TensorDataset에 직접 numpy 넣지 않음)
train_set_Labeled = Subset(full_train_dataset, labeled_indices)
train_set_Unlabeled = Subset(full_train_dataset, unlabeled_indices)
test_set = TensorDataset(Xte_t, yte_t)


(10000, 784)