In [213]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from scipy.linalg import sqrtm
import numpy as np
from math import sqrt
import random
from matplotlib import pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset, ConcatDataset
from torchvision.datasets import MNIST, CIFAR10
import sys
from tqdm import tqdm
import torch.nn.functional as F
from torch.distributions.multivariate_normal import MultivariateNormal
import copy
import gc
import warnings
warnings.filterwarnings("ignore")
import torchvision.models as models
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [214]:
resnet18 = models.resnet18(pretrained=True)
resnet18.fc = torch.nn.Identity()
resnet18.to(device).eval()
print(resnet18)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [215]:
# penalty = 4.282
penalty = 100
eps = 0.01
# width = 0.15
# gap = 0.05
# MCsample = 500
# sigma = 1e-1

In [216]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

# normalize = transforms.Normalize(mean=[0.559, 0.571, 0.586],
#                                  std=[0.230, 0.226, 0.249])

preprocess = transforms.Compose([
    transforms.ToTensor(),
    normalize
])
labels = np.load('../CIFAR-10-C/labels.npy')
selected_indices_0 = labels == 0
selected_indices_1 = labels == 1

def data_preprocess(images):
    images_0 = torch.stack([preprocess(image) for image in images[selected_indices_0]]).to(device)
    images_1 = torch.stack([preprocess(image) for image in images[selected_indices_1]]).to(device)
    with torch.no_grad():
        embedding_0 = resnet18(images_0)
        embedding_1 = resnet18(images_1)
        embedding_0 = torch.concatenate([embedding_0, torch.ones(embedding_0.size()[0],1).to(device)], dim=1)
        embedding_1 = torch.concatenate([embedding_1, torch.ones(embedding_1.size()[0],1).to(device)], dim=1)
        perm = torch.randperm(len(embedding_0))
        embedding_0 = embedding_0[perm]
        embedding_1 = embedding_1[perm]
        # print(embedding_0.size(), embedding_1.size())
    return embedding_0, embedding_1


images = np.load('../CIFAR-10-C/brightness.npy')
images_a_0_embedding, images_a_1_embedding = data_preprocess(images)
images = np.load('../CIFAR-10-C/contrast.npy')
images_b_0_embedding, images_b_1_embedding = data_preprocess(images)
# images = np.load('../CIFAR-10-C/defocus_blur.npy')
# images_c_0_embedding, images_c_1_embedding = data_preprocess(images)
# images = np.load('../CIFAR-10-C/elastic_transform.npy')
# images_d_0_embedding, images_d_1_embedding = data_preprocess(images)
# images = np.load('../CIFAR-10-C/fog.npy')
# images_e_0_embedding, images_e_1_embedding = data_preprocess(images)
# images = np.load('../CIFAR-10-C/frost.npy')
# images_f_0_embedding, images_f_1_embedding = data_preprocess(images)
# images = np.load('../CIFAR-10-C/gaussian_blur.npy')
# images_g_0_embedding, images_g_1_embedding = data_preprocess(images)
# images = np.load('../CIFAR-10-C/gaussian_noise.npy')
# images_h_0_embedding, images_h_1_embedding = data_preprocess(images)
# images = np.load('../CIFAR-10-C/glass_blur.npy')
# images_i_0_embedding, images_i_1_embedding = data_preprocess(images)
# images = np.load('../CIFAR-10-C/impulse_noise.npy')
# images_j_0_embedding, images_j_1_embedding = data_preprocess(images)
# Assuming the embeddings are loaded as numpy arrays
datasets = [
    images_a_0_embedding, 
    images_a_1_embedding, 
    images_b_0_embedding, 
    images_b_1_embedding
]

# Concatenate all datasets to calculate the mean and standard deviation
combined_data = torch.cat(datasets, dim=0)  # Shape: (20000, 513)

# Compute mean and std across the feature dimension (dim=0)
mean = combined_data.mean(dim=0, keepdim=True)
std = combined_data.std(dim=0, keepdim=True)

# Normalize each dataset using the same mean and std
normalized_datasets = [(dataset - mean) / (std + 1e-6) for dataset in datasets]

# Unpack the normalized datasets
images_a_0_embedding, images_a_1_embedding, images_b_0_embedding, images_b_1_embedding = normalized_datasets

P_x = torch.concatenate([
    images_a_0_embedding[4000:],
    images_b_0_embedding[4000:],
    # images_c_0_embedding[4000:],
    # images_d_0_embedding[4000:],
    # images_e_0_embedding[4000:],
    # images_f_0_embedding[4000:],
    # images_g_0_embedding[4000:],
    # images_h_0_embedding[4000:],
    # images_i_0_embedding[4000:],
    # images_j_0_embedding[4000:],
    images_a_1_embedding[4000:],
    images_b_1_embedding[4000:],
    # images_c_1_embedding[4000:],
    # images_d_1_embedding[4000:],
    # images_e_1_embedding[4000:],
    # images_f_1_embedding[4000:],
    # images_g_1_embedding[4000:],
    # images_h_1_embedding[4000:],
    # images_i_1_embedding[4000:],
    # images_j_1_embedding[4000:]
])
# P_y = torch.concatenate([torch.zeros(10000), torch.ones(10000)]).to(device)
P_y = torch.concatenate([torch.zeros(2000), torch.ones(2000)]).to(device)
Q_x = torch.concatenate([
    images_a_0_embedding[:4000],
    images_b_0_embedding[:4000],
    # images_c_0_embedding[:4000],
    # images_d_0_embedding[:4000],
    # images_e_0_embedding[:4000],
    # images_f_0_embedding[:4000],
    # images_g_0_embedding[:4000],
    # images_h_0_embedding[:4000],
    # images_i_0_embedding[:4000],
    # images_j_0_embedding[:4000],
    images_a_1_embedding[:4000],
    images_b_1_embedding[:4000],
    # images_c_1_embedding[:4000],
    # images_d_1_embedding[:4000],
    # images_e_1_embedding[:4000],
    # images_f_1_embedding[:4000],
    # images_g_1_embedding[:4000],
    # images_h_1_embedding[:4000],
    # images_i_1_embedding[:4000],
    # images_j_1_embedding[:4000]
])
# Q_y = torch.concatenate([torch.zeros(40000), torch.ones(40000)]).to(device)
Q_y = torch.concatenate([torch.zeros(8000), torch.ones(8000)]).to(device)
# del resnet18
allocated_memory = torch.cuda.memory_allocated()
print(f"Already allocated: {allocated_memory / 1024 ** 2} MB")
print(P_x[0].sum(), P_y.size())

Already allocated: 226.572265625 MB
tensor(227.4364, device='cuda:0') torch.Size([4000])


In [217]:
class dataset:
    def __init__(self, X, y, num, score, base_loss, base_acc, noise, bias, bias_loss, bias_acc):
        self.X = X
        self.y = y
        self.num = num
        self.score = score
        self.base_loss = base_loss
        self.base_acc = base_acc
        self.noise = noise
        self.bias = bias
        self.bias_loss = bias_loss
        self.bias_acc = bias_acc

In [218]:
def sigmoid(z):
    return 1/(1 + torch.exp(-z))

def generate_data_cifar10(indices, dataset, label, noise):
    X = dataset[indices]
    y = torch.ones(X.size()[0]) * label
    y = torch.tensor(y, dtype=torch.int)
    y = y.to(device)
    y[:int(noise*X.size()[0])] = 1 - y[:int(noise*X.size()[0])]
    # print(y)
    return X, y, X.size()[0]

# def generate_dataset_cifar10(sample_indices, noise):
    # perm_indices = torch.randperm(80000).to(device)
    # sample_indices = perm_indices[:N]
    a_0 = sample_indices[(sample_indices >= 0) & (sample_indices < 4000)]
    b_0 = sample_indices[(sample_indices >= 4000) & (sample_indices < 8000)] - 4000
    c_0 = sample_indices[(sample_indices >= 8000) & (sample_indices < 12000)] - 8000
    d_0 = sample_indices[(sample_indices >= 12000) & (sample_indices < 16000)] - 12000
    e_0 = sample_indices[(sample_indices >= 16000) & (sample_indices < 20000)] - 16000
    f_0 = sample_indices[(sample_indices >= 20000) & (sample_indices < 24000)] - 20000
    g_0 = sample_indices[(sample_indices >= 24000) & (sample_indices < 28000)] - 24000
    h_0 = sample_indices[(sample_indices >= 28000) & (sample_indices < 32000)] - 28000
    i_0 = sample_indices[(sample_indices >= 32000) & (sample_indices < 36000)] - 32000
    j_0 = sample_indices[(sample_indices >= 36000) & (sample_indices < 40000)] - 36000
    a_1 = sample_indices[(sample_indices >= 40000) & (sample_indices < 44000)] - 40000
    b_1 = sample_indices[(sample_indices >= 44000) & (sample_indices < 48000)] - 44000
    c_1 = sample_indices[(sample_indices >= 48000) & (sample_indices < 52000)] - 48000
    d_1 = sample_indices[(sample_indices >= 52000) & (sample_indices < 56000)] - 52000
    e_1 = sample_indices[(sample_indices >= 56000) & (sample_indices < 60000)] - 56000
    f_1 = sample_indices[(sample_indices >= 60000) & (sample_indices < 64000)] - 60000
    g_1 = sample_indices[(sample_indices >= 64000) & (sample_indices < 68000)] - 64000
    h_1 = sample_indices[(sample_indices >= 68000) & (sample_indices < 72000)] - 68000
    i_1 = sample_indices[(sample_indices >= 72000) & (sample_indices < 76000)] - 72000
    j_1 = sample_indices[(sample_indices >= 76000) & (sample_indices < 80000)] - 76000
    a_X_0, a_y_0, a_num_0 = generate_data_cifar10(a_0, images_a_0_embedding[:4000], 0, noise)
    a_X_1, a_y_1, a_num_1 = generate_data_cifar10(a_1, images_a_1_embedding[:4000], 1, noise)
    b_X_0, b_y_0, b_num_0 = generate_data_cifar10(b_0, images_b_0_embedding[:4000], 0, noise)
    b_X_1, b_y_1, b_num_1 = generate_data_cifar10(b_1, images_b_1_embedding[:4000], 1, noise)
    c_X_0, c_y_0, c_num_0 = generate_data_cifar10(c_0, images_c_0_embedding[:4000], 0, noise)
    c_X_1, c_y_1, c_num_1 = generate_data_cifar10(c_1, images_c_1_embedding[:4000], 1, noise)
    d_X_0, d_y_0, d_num_0 = generate_data_cifar10(d_0, images_d_0_embedding[:4000], 0, noise)
    d_X_1, d_y_1, d_num_1 = generate_data_cifar10(d_1, images_d_1_embedding[:4000], 1, noise)
    e_X_0, e_y_0, e_num_0 = generate_data_cifar10(e_0, images_e_0_embedding[:4000], 0, noise)
    e_X_1, e_y_1, e_num_1 = generate_data_cifar10(e_1, images_e_1_embedding[:4000], 1, noise)
    f_X_0, f_y_0, f_num_0 = generate_data_cifar10(f_0, images_f_0_embedding[:4000], 0, noise)
    f_X_1, f_y_1, f_num_1 = generate_data_cifar10(f_1, images_f_1_embedding[:4000], 1, noise)
    g_X_0, g_y_0, g_num_0 = generate_data_cifar10(g_0, images_g_0_embedding[:4000], 0, noise)
    g_X_1, g_y_1, g_num_1 = generate_data_cifar10(g_1, images_g_1_embedding[:4000], 1, noise)
    h_X_0, h_y_0, h_num_0 = generate_data_cifar10(h_0, images_h_0_embedding[:4000], 0, noise)
    h_X_1, h_y_1, h_num_1 = generate_data_cifar10(h_1, images_h_1_embedding[:4000], 1, noise)
    i_X_0, i_y_0, i_num_0 = generate_data_cifar10(i_0, images_i_0_embedding[:4000], 0, noise)
    i_X_1, i_y_1, i_num_1 = generate_data_cifar10(i_1, images_i_1_embedding[:4000], 1, noise)
    j_X_0, j_y_0, j_num_0 = generate_data_cifar10(j_0, images_j_0_embedding[:4000], 0, noise)
    j_X_1, j_y_1, j_num_1 = generate_data_cifar10(j_1, images_j_1_embedding[:4000], 1, noise)
    X = torch.concatenate([
        a_X_0,
        b_X_0,
        c_X_0,
        d_X_0,
        e_X_0,
        f_X_0,
        g_X_0,
        h_X_0,
        i_X_0,
        j_X_0,
        a_X_1,
        b_X_1,
        c_X_1,
        d_X_1,
        e_X_1,
        f_X_1,
        g_X_1,
        h_X_1,
        i_X_1,
        j_X_1
    ], axis = 0)
    y = torch.concatenate([
        a_y_0,
        b_y_0,
        c_y_0,
        d_y_0,
        e_y_0,
        f_y_0,
        g_y_0,
        h_y_0,
        i_y_0,
        j_y_0,
        a_y_1,
        b_y_1,
        c_y_1,
        d_y_1,
        e_y_1,
        f_y_1,
        g_y_1,
        h_y_1,
        i_y_1,
        j_y_1
    ], axis = 0)
    bias = torch.tensor([
        a_num_0+a_num_1,
        b_num_0+b_num_1,
        c_num_0+c_num_1,
        d_num_0+d_num_1,
        e_num_0+e_num_1,
        f_num_0+f_num_1,
        g_num_0+g_num_1,
        h_num_0+h_num_1,
        i_num_0+i_num_1,
        j_num_0+j_num_1
    ]).to(device)
    return bias, X, y

def generate_train_cifar10(train_size_a_0, train_size_a_1, train_size_b_0, train_size_b_1, train_number, noise_level):
    train_dataset = []
    bias = torch.tensor([
        train_size_a_0, train_size_b_0,
        train_size_a_1, train_size_b_1
    ]).to(device)
    for i in range(train_number):
        perm_indices_a_0 = torch.randperm(4000).to(device)
        sample_indices_a_0 = perm_indices_a_0[:train_size_a_0]
        perm_indices_a_1 = torch.randperm(4000).to(device)
        sample_indices_a_1 = perm_indices_a_1[:train_size_a_1]
        perm_indices_b_0 = torch.randperm(4000).to(device)
        sample_indices_b_0 = perm_indices_b_0[:train_size_b_0]
        perm_indices_b_1 = torch.randperm(4000).to(device)
        sample_indices_b_1 = perm_indices_b_1[:train_size_b_1]
        train_X_a_0, train_y_a_0, _ = generate_data_cifar10(sample_indices_a_0, images_a_0_embedding[:4000], 0, eps * noise_level)
        train_X_a_1, train_y_a_1, _ = generate_data_cifar10(sample_indices_a_1, images_a_1_embedding[:4000], 1, eps * noise_level)
        train_X_b_0, train_y_b_0, _ = generate_data_cifar10(sample_indices_b_0, images_b_0_embedding[:4000], 0, eps * noise_level)
        train_X_b_1, train_y_b_1, _ = generate_data_cifar10(sample_indices_b_1, images_b_1_embedding[:4000], 1, eps * noise_level)
        train_X = torch.concatenate([train_X_a_0, train_X_b_0, train_X_a_1, train_X_b_1])
        train_y = torch.concatenate([train_y_a_0, train_y_b_0, train_y_a_1, train_y_b_1])
        train_dataset.append(dataset(train_X, train_y, train_X.size()[0], 0, 0, 0, noise_level*eps, bias, [], []))
    return train_dataset

def subsample(X, y, size):
    perm = torch.randperm(len(y))
    sample_X = X[perm[:size]]
    sample_y = y[perm[:size]]
    return sample_X, sample_y

def compute_hessian(mu, X):
    sigm = sigmoid(X @ mu.t())
    diag_sigm = (sigm * (1 - sigm)).flatten()
    res = torch.eye(X.size(1), device=device)/penalty
    res += (X.t() * diag_sigm) @ X
    return res

def compute_score(mu0, Q0, lg0, mu1, Q1, lg1, mu2, Q2, lg2):
    Q = Q1 + Q2 - Q0
    Q_t_L = torch.linalg.cholesky(Q)
    Q_t_L_inv = torch.linalg.solve_triangular(Q_t_L, torch.eye(Q_t_L.size(0), device=device), upper=False)
    Q_inv = Q_t_L_inv.T @ Q_t_L_inv
    mu = torch.matmul(Q_inv, torch.matmul(Q1, mu1) + torch.matmul(Q2, mu2) - torch.matmul(Q0, mu0))

    lg12 = 2 * torch.sum(torch.log(torch.diagonal(Q_t_L)))

    lg = lg1+lg2-lg12-lg0

    sqr = torch.matmul(mu.T, torch.matmul(Q, mu)) - torch.matmul(mu1.T, torch.matmul(Q1, mu1)) - torch.matmul(mu2.T, torch.matmul(Q2, mu2)) + torch.matmul(mu0.T, torch.matmul(Q0, mu0))

    score = 0.5 * (lg + sqr)
    # print(lg1,lg2,lg12,lg0,sqr)
    return score.item()

def compute_data_score_err(mu_test, Q_test, test_X, test_y, train_X, train_y, lg2, bias):
    test_N = test_y.size()[0]
    M = test_X.size()[1]
    test_size_a_0, test_size_b_0, test_size_a_1, test_size_b_1 = bias

    mu0 = torch.zeros((1, M))
    mu0 = mu0.to(device)
    Q0 = torch.eye(M)/penalty
    Q0 = Q0.to(device)
    lg0 = -M * torch.log(torch.tensor(penalty))
    
    train = LogisticRegression(fit_intercept = False, C = penalty, max_iter=5000).fit(train_X.cpu(), train_y.cpu())
    # print(train.score(torch.cat([Q_x[1000:2000], Q_x[5000:6000], Q_x[9000:10000], Q_x[13000:14000]]).cpu(), torch.cat([Q_y[1000:2000], Q_y[5000:6000], Q_y[9000:10000], Q_y[13000:14000]]).cpu()))
    # print(train.score(P_x.cpu(), P_y.cpu()))
    mu_train = torch.tensor(train.coef_, dtype=torch.float32, device=device)
    # mu_train_numpy = mu_train.detach().squeeze().cpu().numpy()

    Q_train = compute_hessian(mu_train, train_X)
    Q_train_L = torch.linalg.cholesky(Q_train)
    # Q_train_inverse = Q_train_L_inv.T @ Q_train_L_inv
    # Q_train_inverse = torch.inverse(Q_train)
    # Q_numpy = Q_train_inverse.detach().cpu().numpy()

    lg1 = 2 * torch.sum(torch.log(torch.diagonal(Q_train_L)))

    score = compute_score(mu0.t(), Q0, lg0, mu_train.t(), Q_train, lg1, mu_test.t(), Q_test, lg2)

    test_y = test_y.float()
    criterion = nn.BCELoss()

    base_predictive = sigmoid(torch.matmul(test_X, mu_train.t())).squeeze()
    base_predictions = (base_predictive >= 0.5).float()
    base_loss = criterion(base_predictive, test_y)
    base_acc = (base_predictions == test_y).float().mean()

    base_predictive_0 = sigmoid(torch.matmul(test_X[:test_size_a_0], mu_train.t())).squeeze()
    base_predictions_0 = (base_predictive_0 >= 0.5).float()
    base_loss_0 = criterion(base_predictive_0, test_y[:test_size_a_0])
    base_acc_0 = (base_predictions_0 == test_y[:test_size_a_0]).float().mean()

    base_predictive_1 = sigmoid(torch.matmul(test_X[test_size_a_0:test_size_a_0+test_size_b_0], mu_train.t())).squeeze()
    base_predictions_1 = (base_predictive_1 >= 0.5).float()
    base_loss_1 = criterion(base_predictive_1, test_y[test_size_a_0:test_size_a_0+test_size_b_0])
    base_acc_1 = (base_predictions_1 == test_y[test_size_a_0:test_size_a_0+test_size_b_0]).float().mean()

    base_predictive_2 = sigmoid(torch.matmul(test_X[test_size_a_0+test_size_b_0:test_size_a_0+test_size_b_0+test_size_a_1], mu_train.t())).squeeze()
    base_predictions_2 = (base_predictive_2 >= 0.5).float()
    base_loss_2 = criterion(base_predictive_2, test_y[test_size_a_0+test_size_b_0:test_size_a_0+test_size_b_0+test_size_a_1])
    base_acc_2 = (base_predictions_2 == test_y[test_size_a_0+test_size_b_0:test_size_a_0+test_size_b_0+test_size_a_1]).float().mean()

    base_predictive_3 = sigmoid(torch.matmul(test_X[test_size_a_0+test_size_b_0+test_size_a_1:], mu_train.t())).squeeze()
    base_predictions_3 = (base_predictive_3 >= 0.5).float()
    base_loss_3 = criterion(base_predictive_3, test_y[test_size_a_0+test_size_b_0+test_size_a_1:])
    base_acc_3 = (base_predictions_3 == test_y[test_size_a_0+test_size_b_0+test_size_a_1:]).float().mean()
    
    return score, base_loss.item(), base_acc.item(), np.array([base_loss_0.item(), base_loss_1.item(), base_loss_2.item(), base_loss_3.item()]), np.array([base_acc_0.item(), base_acc_1.item(), base_acc_2.item(), base_acc_3.item()])

def get_err_score(train_data, test_X, test_y, train_number, test_bias):
    test = LogisticRegression(fit_intercept = False, C = penalty, max_iter=5000).fit(test_X.cpu(), test_y.cpu())
    mu_test = torch.tensor(test.coef_, dtype=torch.float32, device=device)
    Q_test = compute_hessian(mu_test, test_X)

    L = torch.linalg.cholesky(Q_test)
    lg2 = 2 * torch.sum(torch.log(torch.diagonal(L)))

    for i in range(train_number):
        train_data[i].score, train_data[i].base_loss, train_data[i].base_acc, train_data[i].bias_loss, train_data[i].bias_acc = compute_data_score_err(mu_test, Q_test, test_X, test_y, train_data[i].X, train_data[i].y, lg2, test_bias)


# def random_copy(train_data, copy_num, num_candidate, train_size):
#     new_train_data = []
#     for i in range(num_candidate):
#         perm_indices = torch.randperm(train_size)
#         sample_indices = perm_indices[:copy_num]
#         new_train_X = torch.concatenate([train_data[i].X, train_data[i].X[sample_indices]])
#         new_train_y = torch.concatenate([train_data[i].y, train_data[i].y[sample_indices]])
#         new_train_data.append(dataset(new_train_X, new_train_y, new_train_X.size()[0], 0, 0, 0, 0, 0, 0, train_data[i].noise_ratio, train_data[i].label_ratio, train_data[i].bias_ratio))
#     return new_train_data

def mimic_label_copy(train_data, num_candidate, test_ratio):
    new_train_data = []
    test_a_label_ratio = test_ratio[0]/test_ratio[2]
    test_b_label_ratio = test_ratio[1]/test_ratio[3]
    for i in range(num_candidate):
        train_size_a_0, train_size_b_0, train_size_a_1, train_size_b_1 = train_data[i].bias
        target_size_a_0 = int(train_size_a_1 * test_a_label_ratio)
        target_size_b_0 = int(train_size_b_1 * test_b_label_ratio)

        if train_size_a_0 < target_size_a_0:
            num_extra = int(target_size_a_0/train_size_a_0) - 1
            # extra_indices = torch.randint(0, train_size_a_0, (num_extra,))
            indices = torch.range(0,train_size_a_0-1)
            extra_indices = torch.tensor([])
            for g in range(num_extra):
                extra_indices = torch.cat([extra_indices, indices])
            extra_indices = extra_indices.to(torch.long)
            new_train_X_a_0 = torch.cat([train_data[i].X[:train_size_a_0], train_data[i].X[:train_size_a_0][extra_indices]])
            new_train_X_a_1 = train_data[i].X[train_size_a_0+train_size_b_0:train_size_a_0+train_size_b_0+train_size_a_1]
            new_train_y_a_0 = torch.cat([train_data[i].y[:train_size_a_0], train_data[i].y[:train_size_a_0][extra_indices]])
            new_train_y_a_1 = train_data[i].y[train_size_a_0+train_size_b_0:train_size_a_0+train_size_b_0+train_size_a_1]
        else:
            target_size_a_1 = int(train_size_a_0 / test_a_label_ratio)
            num_extra = int(target_size_a_1/train_size_a_1) - 1
            # extra_indices = torch.randint(0, train_size_a_1, (num_extra,))
            indices = torch.range(0,train_size_a_1-1)
            extra_indices = torch.tensor([])
            for g in range(num_extra):
                extra_indices = torch.cat([extra_indices, indices])
            extra_indices = extra_indices.to(torch.long)
            new_train_X_a_1 = torch.cat([train_data[i].X[train_size_a_0+train_size_b_0:train_size_a_0+train_size_b_0+train_size_a_1], train_data[i].X[train_size_a_0+train_size_b_0:train_size_a_0+train_size_b_0+train_size_a_1][extra_indices]])
            new_train_X_a_0 = train_data[i].X[:train_size_a_0]
            new_train_y_a_1 = torch.cat([train_data[i].y[train_size_a_0+train_size_b_0:train_size_a_0+train_size_b_0+train_size_a_1], train_data[i].y[train_size_a_0+train_size_b_0:train_size_a_0+train_size_b_0+train_size_a_1][extra_indices]])
            new_train_y_a_0 = train_data[i].y[:train_size_a_0]

        if train_size_b_0 < target_size_b_0:
            num_extra = int(target_size_b_0/train_size_b_0) - 1
            # extra_indices = torch.randint(0, train_size_b_0, (num_extra,))
            indices = torch.range(0,train_size_b_0-1)
            extra_indices = torch.tensor([])
            for g in range(num_extra):
                extra_indices = torch.cat([extra_indices, indices])
            extra_indices = extra_indices.to(torch.long)
            new_train_X_b_0 = torch.cat([train_data[i].X[train_size_a_0:train_size_a_0+train_size_b_0], train_data[i].X[train_size_a_0:train_size_a_0+train_size_b_0][extra_indices]])
            new_train_X_b_1 = train_data[i].X[train_size_a_0+train_size_b_0+train_size_a_1:]
            new_train_y_b_0 = torch.cat([train_data[i].y[train_size_a_0:train_size_a_0+train_size_b_0], train_data[i].y[train_size_a_0:train_size_a_0+train_size_b_0][extra_indices]])
            new_train_y_b_1 = train_data[i].y[train_size_a_0+train_size_b_0+train_size_a_1:]
        else:
            target_size_b_1 = int(train_size_b_0 / test_b_label_ratio)
            num_extra = int(target_size_b_1/train_size_b_1) - 1
            # extra_indices = torch.randint(0, train_size_b_1, (num_extra,))
            indices = torch.range(0,train_size_b_1-1)
            extra_indices = torch.tensor([])
            for g in range(num_extra):
                extra_indices = torch.cat([extra_indices, indices])
            extra_indices = extra_indices.to(torch.long)
            new_train_X_b_1 = torch.cat([train_data[i].X[train_size_a_0+train_size_b_0+train_size_a_1:], train_data[i].X[train_size_a_0+train_size_b_0+train_size_a_1:][extra_indices]])
            new_train_X_b_0 = train_data[i].X[train_size_a_0:train_size_a_0+train_size_b_0]
            new_train_y_b_1 = torch.cat([train_data[i].y[train_size_a_0+train_size_b_0+train_size_a_1:], train_data[i].y[train_size_a_0+train_size_b_0+train_size_a_1:][extra_indices]])
            new_train_y_b_0 = train_data[i].y[train_size_a_0:train_size_a_0+train_size_b_0]

        new_train_X = torch.concatenate([new_train_X_a_0, new_train_X_b_0, new_train_X_a_1, new_train_X_b_1])
        new_train_y = torch.concatenate([new_train_y_a_0, new_train_y_b_0, new_train_y_a_1, new_train_y_b_1])
        new_train_data.append(dataset(new_train_X, new_train_y, new_train_X.size()[0], 0, 0, 0, train_data[i].noise, torch.tensor([new_train_X_a_0.size()[0], new_train_X_b_0.size()[0], new_train_X_a_1.size()[0], new_train_X_b_1.size()[0]]).to(device), [], []))
        # print(torch.tensor([new_train_X_a_0.size(), new_train_X_b_0.size(), new_train_X_a_1.size(), new_train_X_b_1.size()]).to(device))
    return new_train_data

def mimic_label_delete(train_data, num_candidate, test_ratio):
    new_train_data = []
    test_a_label_ratio = test_ratio[0]/test_ratio[2]
    test_b_label_ratio = test_ratio[1]/test_ratio[3]
    for i in range(num_candidate):
        train_size_a_0, train_size_b_0, train_size_a_1, train_size_b_1 = train_data[i].bias
        target_size_a_0 = int(train_size_a_1 * test_a_label_ratio)
        target_size_b_0 = int(train_size_b_1 * test_b_label_ratio)

        if train_size_a_0 > target_size_a_0:
            residual_indices = torch.randperm(train_size_a_0)[:target_size_a_0]
            new_train_X_a_0 = train_data[i].X[:train_size_a_0][residual_indices]
            new_train_X_a_1 = train_data[i].X[train_size_a_0+train_size_b_0:train_size_a_0+train_size_b_0+train_size_a_1]
            new_train_y_a_0 = train_data[i].y[:train_size_a_0][residual_indices]
            new_train_y_a_1 = train_data[i].y[train_size_a_0+train_size_b_0:train_size_a_0+train_size_b_0+train_size_a_1]
        else:
            target_size_a_1 = int(train_size_a_0 / test_a_label_ratio)
            residual_indices = torch.randperm(train_size_a_1)[:target_size_a_1]
            new_train_X_a_1 = train_data[i].X[train_size_a_0+train_size_b_0:train_size_a_0+train_size_b_0+train_size_a_1][residual_indices]
            new_train_X_a_0 = train_data[i].X[:train_size_a_0]
            new_train_y_a_1 = train_data[i].y[train_size_a_0+train_size_b_0:train_size_a_0+train_size_b_0+train_size_a_1][residual_indices]
            new_train_y_a_0 = train_data[i].y[:train_size_a_0]

        if train_size_b_0 > target_size_b_0:
            residual_indices = torch.randperm(train_size_b_0)[:target_size_b_0]
            new_train_X_b_0 = train_data[i].X[train_size_a_0:train_size_a_0+train_size_b_0][residual_indices]
            new_train_X_b_1 = train_data[i].X[train_size_a_0+train_size_b_0+train_size_a_1:]
            new_train_y_b_0 = train_data[i].y[train_size_a_0:train_size_a_0+train_size_b_0][residual_indices]
            new_train_y_b_1 = train_data[i].y[train_size_a_0+train_size_b_0+train_size_a_1:]
        else:
            target_size_b_1 = int(train_size_b_0 / test_b_label_ratio)
            residual_indices = torch.randperm(train_size_b_1)[:target_size_b_1]
            new_train_X_b_1 = train_data[i].X[train_size_a_0+train_size_b_0+train_size_a_1:][residual_indices]
            new_train_X_b_0 = train_data[i].X[train_size_a_0:train_size_a_0+train_size_b_0]
            new_train_y_b_1 = train_data[i].y[train_size_a_0+train_size_b_0+train_size_a_1:][residual_indices]
            new_train_y_b_0 = train_data[i].y[train_size_a_0:train_size_a_0+train_size_b_0]

        new_train_X = torch.concatenate([new_train_X_a_0, new_train_X_b_0, new_train_X_a_1, new_train_X_b_1])
        new_train_y = torch.concatenate([new_train_y_a_0, new_train_y_b_0, new_train_y_a_1, new_train_y_b_1])
        new_train_data.append(dataset(new_train_X, new_train_y, new_train_X.size()[0], 0, 0, 0, train_data[i].noise, torch.tensor([new_train_X_a_0.size()[0], new_train_X_b_0.size()[0], new_train_X_a_1.size()[0], new_train_X_b_1.size()[0]]).to(device), [], []))
        # print(torch.tensor([new_train_X_a_0.size()[0], new_train_X_b_0.size()[0], new_train_X_a_1.size()[0], new_train_X_b_1.size()[0]]).to(device))
    return new_train_data

def mimic_bias_copy(train_data, num_candidate, test_ratio):
    new_train_data = []
    test_0_bias_ratio = test_ratio[0]/test_ratio[1]
    test_1_bias_ratio = test_ratio[2]/test_ratio[3]
    for i in range(num_candidate):
        train_size_a_0, train_size_b_0, train_size_a_1, train_size_b_1 = train_data[i].bias
        target_size_a_0 = int(train_size_b_0 * test_0_bias_ratio)
        target_size_a_1 = int(train_size_b_1 * test_1_bias_ratio)

        if train_size_a_0 < target_size_a_0:
            num_extra = int(target_size_a_0/train_size_a_0) - 1
            # extra_indices = torch.randint(0, train_size_a_0, (num_extra,))
            indices = torch.range(0,train_size_a_0-1)
            extra_indices = torch.tensor([])
            for g in range(num_extra):
                extra_indices = torch.cat([extra_indices, indices])
            extra_indices = extra_indices.to(torch.long)
            new_train_X_a_0 = torch.cat([train_data[i].X[:train_size_a_0], train_data[i].X[:train_size_a_0][extra_indices]])
            new_train_X_b_0 = train_data[i].X[train_size_a_0:train_size_a_0+train_size_b_0]
            new_train_y_a_0 = torch.cat([train_data[i].y[:train_size_a_0], train_data[i].y[:train_size_a_0][extra_indices]])
            new_train_y_b_0 = train_data[i].y[train_size_a_0:train_size_a_0+train_size_b_0]
        else:
            target_size_b_0 = int(train_size_a_0 / test_0_bias_ratio)
            num_extra = int(target_size_b_0/train_size_b_0) - 1
            # extra_indices = torch.randint(0, train_size_b_0, (num_extra,))
            indices = torch.range(0,train_size_b_0-1)
            extra_indices = torch.tensor([])
            for g in range(num_extra):
                extra_indices = torch.cat([extra_indices, indices])
            extra_indices = extra_indices.to(torch.long)
            new_train_X_b_0 = torch.cat([train_data[i].X[train_size_a_0:train_size_a_0+train_size_b_0], train_data[i].X[train_size_a_0:train_size_a_0+train_size_b_0][extra_indices]])
            new_train_X_a_0 = train_data[i].X[:train_size_a_0]
            new_train_y_b_0 = torch.cat([train_data[i].y[train_size_a_0:train_size_a_0+train_size_b_0], train_data[i].y[train_size_a_0:train_size_a_0+train_size_b_0][extra_indices]])
            new_train_y_a_0 = train_data[i].y[:train_size_a_0]

        if train_size_a_1 < target_size_a_1:
            num_extra = int(target_size_a_1/train_size_a_1) - 1
            # extra_indices = torch.randint(0, train_size_a_1, (num_extra,))
            indices = torch.range(0,train_size_a_1-1)
            extra_indices = torch.tensor([])
            for g in range(num_extra):
                extra_indices = torch.cat([extra_indices, indices])
            extra_indices = extra_indices.to(torch.long)
            new_train_X_a_1 = torch.cat([train_data[i].X[train_size_a_0+train_size_b_0:train_size_a_0+train_size_b_0+train_size_a_1], train_data[i].X[train_size_a_0+train_size_b_0:train_size_a_0+train_size_b_0+train_size_a_1][extra_indices]])
            new_train_X_b_1 = train_data[i].X[train_size_a_0+train_size_b_0+train_size_a_1:]
            new_train_y_a_1 = torch.cat([train_data[i].y[train_size_a_0+train_size_b_0:train_size_a_0+train_size_b_0+train_size_a_1], train_data[i].y[train_size_a_0+train_size_b_0:train_size_a_0+train_size_b_0+train_size_a_1][extra_indices]])
            new_train_y_b_1 = train_data[i].y[train_size_a_0+train_size_b_0+train_size_a_1:]
        else:
            target_size_b_1 = int(train_size_a_1 / test_1_bias_ratio)
            num_extra = int(target_size_b_1/train_size_b_1) - 1
            # extra_indices = torch.randint(0, train_size_b_1, (num_extra,))
            indices = torch.range(0,train_size_b_1-1)
            extra_indices = torch.tensor([])
            for g in range(num_extra):
                extra_indices = torch.cat([extra_indices, indices])
            extra_indices = extra_indices.to(torch.long)
            new_train_X_b_1 = torch.cat([train_data[i].X[train_size_a_0+train_size_b_0+train_size_a_1:], train_data[i].X[train_size_a_0+train_size_b_0+train_size_a_1:][extra_indices]])
            new_train_X_a_1 = train_data[i].X[train_size_a_0+train_size_b_0:train_size_a_0+train_size_b_0+train_size_a_1]
            new_train_y_b_1 = torch.cat([train_data[i].y[train_size_a_0+train_size_b_0+train_size_a_1:], train_data[i].y[train_size_a_0+train_size_b_0+train_size_a_1:][extra_indices]])
            new_train_y_a_1 = train_data[i].y[train_size_a_0+train_size_b_0:train_size_a_0+train_size_b_0+train_size_a_1]

        new_train_X = torch.concatenate([new_train_X_a_0, new_train_X_b_0, new_train_X_a_1, new_train_X_b_1])
        new_train_y = torch.concatenate([new_train_y_a_0, new_train_y_b_0, new_train_y_a_1, new_train_y_b_1])
        new_train_data.append(dataset(new_train_X, new_train_y, new_train_X.size()[0], 0, 0, 0, train_data[i].noise, torch.tensor([new_train_X_a_0.size()[0], new_train_X_b_0.size()[0], new_train_X_a_1.size()[0], new_train_X_b_1.size()[0]]).to(device), [], []))
        # print(torch.tensor([new_train_X_a_0.size()[0], new_train_X_b_0.size()[0], new_train_X_a_1.size()[0], new_train_X_b_1.size()[0]]).to(device))
    return new_train_data

def mimic_bias_delete(train_data, num_candidate, test_ratio):
    new_train_data = []
    test_0_bias_ratio = test_ratio[0]/test_ratio[1]
    test_1_bias_ratio = test_ratio[2]/test_ratio[3]
    for i in range(num_candidate):
        train_size_a_0, train_size_b_0, train_size_a_1, train_size_b_1 = train_data[i].bias
        target_size_a_0 = int(train_size_b_0 * test_0_bias_ratio)
        target_size_a_1 = int(train_size_b_1 * test_1_bias_ratio)

        if train_size_a_0 > target_size_a_0:
            residual_indices = torch.randperm(train_size_a_0)[:target_size_a_0]
            new_train_X_a_0 = train_data[i].X[:train_size_a_0][residual_indices]
            new_train_X_b_0 = train_data[i].X[train_size_a_0:train_size_a_0+train_size_b_0]
            new_train_y_a_0 = train_data[i].y[:train_size_a_0][residual_indices]
            new_train_y_b_0 = train_data[i].y[train_size_a_0:train_size_a_0+train_size_b_0]
        else:
            target_size_b_0 = int(train_size_a_0 / test_0_bias_ratio)
            residual_indices = torch.randperm(train_size_b_0)[:target_size_b_0]
            new_train_X_b_0 = train_data[i].X[train_size_a_0:train_size_a_0+train_size_b_0][residual_indices]
            new_train_X_a_0 = train_data[i].X[:train_size_a_0]
            new_train_y_b_0 = train_data[i].y[train_size_a_0:train_size_a_0+train_size_b_0][residual_indices]
            new_train_y_a_0 = train_data[i].y[:train_size_a_0]

        if train_size_a_1 > target_size_a_1:
            residual_indices = torch.randperm(train_size_a_1)[:target_size_a_1]
            new_train_X_a_1 = train_data[i].X[train_size_a_0+train_size_b_0:train_size_a_0+train_size_b_0+train_size_a_1][residual_indices]
            new_train_X_b_1 = train_data[i].X[train_size_a_0+train_size_b_0+train_size_a_1:]
            new_train_y_a_1 = train_data[i].y[train_size_a_0+train_size_b_0:train_size_a_0+train_size_b_0+train_size_a_1][residual_indices]
            new_train_y_b_1 = train_data[i].y[train_size_a_0+train_size_b_0+train_size_a_1:]
        else:
            target_size_b_1 = int(train_size_a_1 / test_1_bias_ratio)
            residual_indices = torch.randperm(train_size_b_1)[:target_size_b_1]
            new_train_X_b_1 = train_data[i].X[train_size_a_0+train_size_b_0+train_size_a_1:][residual_indices]
            new_train_X_a_1 = train_data[i].X[train_size_a_0+train_size_b_0:train_size_a_0+train_size_b_0+train_size_a_1]
            new_train_y_b_1 = train_data[i].y[train_size_a_0+train_size_b_0+train_size_a_1:][residual_indices]
            new_train_y_a_1 = train_data[i].y[train_size_a_0+train_size_b_0:train_size_a_0+train_size_b_0+train_size_a_1]

        new_train_X = torch.concatenate([new_train_X_a_0, new_train_X_b_0, new_train_X_a_1, new_train_X_b_1])
        new_train_y = torch.concatenate([new_train_y_a_0, new_train_y_b_0, new_train_y_a_1, new_train_y_b_1])
        new_train_data.append(dataset(new_train_X, new_train_y, new_train_X.size()[0], 0, 0, 0, train_data[i].noise, torch.tensor([new_train_X_a_0.size()[0], new_train_X_b_0.size()[0], new_train_X_a_1.size()[0], new_train_X_b_1.size()[0]]).to(device), [], []))
        # print(torch.tensor([new_train_X_a_0.size()[0], new_train_X_b_0.size()[0], new_train_X_a_1.size()[0], new_train_X_b_1.size()[0]]).to(device))
    return new_train_data

def data_denoise(train_data, num_candidate, ratio = 0.5):
    new_train_data = []
    for i in range(num_candidate):
        train_size_a_0, train_size_b_0, train_size_a_1, train_size_b_1 = train_data[i].bias
        noise = train_data[i].noise
        new_train_X_a_0 = train_data[i].X[:train_size_a_0][int(ratio*noise*train_size_a_0):]
        new_train_X_b_0 = train_data[i].X[train_size_a_0:train_size_a_0+train_size_b_0][int(ratio*noise*train_size_b_0):]
        new_train_X_a_1 = train_data[i].X[train_size_a_0+train_size_b_0:train_size_a_0+train_size_b_0+train_size_a_1][int(ratio*noise*train_size_a_1):]
        new_train_X_b_1 = train_data[i].X[train_size_a_0+train_size_b_0+train_size_a_1:][int(ratio*noise*train_size_b_1):]
        new_train_y_a_0 = train_data[i].y[:train_size_a_0][int(ratio*noise*train_size_a_0):]
        new_train_y_b_0 = train_data[i].y[train_size_a_0:train_size_a_0+train_size_b_0][int(ratio*noise*train_size_b_0):]
        new_train_y_a_1 = train_data[i].y[train_size_a_0+train_size_b_0:train_size_a_0+train_size_b_0+train_size_a_1][int(ratio*noise*train_size_a_1):]
        new_train_y_b_1 = train_data[i].y[train_size_a_0+train_size_b_0+train_size_a_1:][int(ratio*noise*train_size_b_1):]
        new_train_X = torch.concatenate([new_train_X_a_0, new_train_X_b_0, new_train_X_a_1, new_train_X_b_1])
        new_train_y = torch.concatenate([new_train_y_a_0, new_train_y_b_0, new_train_y_a_1, new_train_y_b_1])
        new_train_data.append(dataset(new_train_X, new_train_y, new_train_X.size()[0], 0, 0, 0, train_data[i].noise*(1-ratio), torch.tensor([new_train_X_a_0.size()[0], new_train_X_b_0.size()[0], new_train_X_a_1.size()[0], new_train_X_b_1.size()[0]]).to(device), [], []))
        # print(torch.tensor([new_train_X_a_0.size()[0], new_train_X_b_0.size()[0], new_train_X_a_1.size()[0], new_train_X_b_1.size()[0]]).to(device))
    return new_train_data

In [219]:
T = 1
D = 100
train_size_a_0 = 20
train_size_a_1 = 40
train_size_b_0 = 40
train_size_b_1 = 20
num_candidate = 1
test_size_a_0 = 40
test_size_a_1 = 20
test_size_b_0 = 20
test_size_b_1 = 40
noise_levels = [5]

criterion = nn.BCELoss()

test_label_ratio = (test_size_a_0 + test_size_b_0)/(test_size_a_1 + test_size_b_1)
test_bias_ratio = (test_size_a_0 + test_size_a_1)/(test_size_b_0 + test_size_b_1)
test_size = test_size_a_0 + test_size_a_1 + test_size_b_0 + test_size_b_1
test_ratio = (test_size_a_0, test_size_b_0, test_size_a_1, test_size_b_1)
for noise_level in noise_levels:
    pre_score = 0
    mimic_label_copy_score = 0
    mimic_label_delete_socre = 0
    mimic_bias_copy_score = 0
    mimic_bias_delete_score = 0
    data_denoise_score = 0
    pre_loss = 0
    mimic_label_copy_loss = 0
    mimic_label_delete_loss = 0
    mimic_bias_copy_loss = 0
    mimic_bias_delete_loss = 0
    data_denoise_loss = 0
    pre_acc = 0
    mimic_label_copy_acc = 0
    mimic_label_delete_acc = 0
    mimic_bias_copy_acc = 0
    mimic_bias_delete_acc = 0
    data_denoise_acc = 0
    pre_bias_loss = np.array([0.,0.,0.,0.])
    mimic_label_copy_bias_loss = np.array([0.,0.,0.,0.])
    mimic_label_delete_bias_loss = np.array([0.,0.,0.,0.])
    mimic_bias_copy_bias_loss = np.array([0.,0.,0.,0.])
    mimic_bias_delete_bias_loss = np.array([0.,0.,0.,0.])
    data_denoise_bias_loss = np.array([0.,0.,0.,0.])
    pre_bias_acc = np.array([0.,0.,0.,0.])
    mimic_label_copy_bias_acc = np.array([0.,0.,0.,0.])
    mimic_label_delete_bias_acc = np.array([0.,0.,0.,0.])
    mimic_bias_copy_bias_acc = np.array([0.,0.,0.,0.])
    mimic_bias_delete_bias_acc = np.array([0.,0.,0.,0.])
    data_denoise_bias_acc = np.array([0.,0.,0.,0.])
    for d in tqdm(range(D)):
        test_X = P_x
        test_y = P_y
        # penalty = 10000
    #     best_penalty = 1
    #     best_loss = 100
    #     for c in range(1, 11):
    #         loss = 0
    #         for i in range(30):
    #             sub_test_X, sub_test_y = subsample(Q_x, Q_y, 200)
    #             test = LogisticRegression(fit_intercept = False, C = c, max_iter=50000).fit(sub_test_X.cpu(), sub_test_y.cpu())
    #             # acc += test.score(test_X.cpu(), test_y.cpu())
    #             mu = torch.tensor(test.coef_, dtype=torch.float32, device=device)
    #             predictive = sigmoid(torch.matmul(test_X, mu.t())).squeeze()
    #             predictions = (predictive >= 0.5).float()
    #             loss += criterion(predictive, test_y)
    #         if loss < best_loss:
    #             penalty = c
    #             best_loss = loss
    #         print(loss)
    #     print(penalty)
    #     for t in range(30):
    #         loss_1 = 0
    #         loss_2 = 0
    #         for i in range(30):
    #             sub_test_X, sub_test_y = subsample(Q_x, Q_y, 200)
    #             test = LogisticRegression(fit_intercept = False, C = penalty+0.1, max_iter=50000).fit(sub_test_X.cpu(), sub_test_y.cpu())
    #             # acc_1 += test.score(test_X.cpu(), test_y.cpu())
    #             mu = torch.tensor(test.coef_, dtype=torch.float32, device=device)
    #             predictive = sigmoid(torch.matmul(test_X, mu.t())).squeeze()
    #             predictions = (predictive >= 0.5).float()
    #             loss_1 += criterion(predictive, test_y)
    #         for i in range(30):
    #             sub_test_X, sub_test_y = subsample(Q_x, Q_y, 200)
    #             test = LogisticRegression(fit_intercept = False, C = penalty-0.1, max_iter=50000).fit(sub_test_X.cpu(), sub_test_y.cpu())
    #             # acc_2 += test.score(test_X.cpu(), test_y.cpu())
    #             mu = torch.tensor(test.coef_, dtype=torch.float32, device=device)
    #             predictive = sigmoid(torch.matmul(test_X, mu.t())).squeeze()
    #             predictions = (predictive >= 0.5).float()
    #             loss_2 += criterion(predictive, test_y)
    #         # test = LogisticRegression(fit_intercept = False, C = penalty, max_iter=50000).fit(Q_x.cpu(), Q_y.cpu())
    #         # acc_1 = test.score(test_X.cpu(), test_y.cpu())
    #         # test = LogisticRegression(fit_intercept = False, C = penalty-0.1, max_iter=50000).fit(Q_x.cpu(), Q_y.cpu())
    #         # acc_2 = test.score(test_X.cpu(), test_y.cpu())
    #         if loss_1 < best_loss:
    #             best_loss = loss_1
    #             best_penalty = penalty+0.1
    #         if loss_2 < best_loss:
    #             best_loss = loss_2
    #             best_penalty = penalty-0.1
    #         penalty = penalty - (loss_1.item()-loss_2.item())/8
    #         print(loss_1, loss_2, penalty)
    #     # print(acc)
    #     print(best_penalty, best_loss, penalty)
    #     sys.exit()

        train_data = generate_train_cifar10(train_size_a_0, train_size_a_1, train_size_b_0, train_size_b_1, num_candidate, noise_level)
        mimic_label_copy_train_data = mimic_label_copy(train_data, num_candidate, test_ratio)
        mimic_label_delete_train_data = mimic_label_delete(train_data, num_candidate, test_ratio)
        mimic_bias_copy_train_data = mimic_bias_copy(train_data, num_candidate, test_ratio)
        mimic_bias_delete_train_data = mimic_bias_delete(train_data, num_candidate, test_ratio)
        data_denoise_train_data = data_denoise(train_data, num_candidate, ratio=1)

        # for t in range(T):
        sample_test_X_a_0, sample_test_y_a_0 = subsample(images_a_0_embedding[4000:], torch.zeros(1000).to(device), test_size_a_0)
        sample_test_X_a_1, sample_test_y_a_1 = subsample(images_a_1_embedding[4000:], torch.ones(1000).to(device), test_size_a_1)
        sample_test_X_b_0, sample_test_y_b_0 = subsample(images_b_0_embedding[4000:], torch.zeros(1000).to(device), test_size_b_0)
        sample_test_X_b_1, sample_test_y_b_1 = subsample(images_b_1_embedding[4000:], torch.ones(1000).to(device), test_size_b_1)
        sample_test_X = torch.concatenate([sample_test_X_a_0, sample_test_X_b_0, sample_test_X_a_1, sample_test_X_b_1])
        sample_test_y = torch.concatenate([sample_test_y_a_0, sample_test_y_b_0, sample_test_y_a_1, sample_test_y_b_1])

        get_err_score(train_data, sample_test_X, sample_test_y, num_candidate, test_ratio)
        get_err_score(mimic_label_copy_train_data, sample_test_X, sample_test_y, num_candidate, test_ratio)
        get_err_score(mimic_label_delete_train_data, sample_test_X, sample_test_y, num_candidate, test_ratio)
        get_err_score(mimic_bias_copy_train_data, sample_test_X, sample_test_y, num_candidate, test_ratio)
        get_err_score(mimic_bias_delete_train_data, sample_test_X, sample_test_y, num_candidate, test_ratio)
        get_err_score(data_denoise_train_data, sample_test_X, sample_test_y, num_candidate, test_ratio)

        for i in range(num_candidate):
            pre_score += train_data[i].score
            mimic_label_copy_score += mimic_label_copy_train_data[i].score
            mimic_label_delete_socre += mimic_label_delete_train_data[i].score
            mimic_bias_copy_score += mimic_bias_copy_train_data[i].score
            mimic_bias_delete_score += mimic_bias_delete_train_data[i].score
            data_denoise_score += data_denoise_train_data[i].score
            pre_loss += train_data[i].base_loss
            mimic_label_copy_loss += mimic_label_copy_train_data[i].base_loss
            mimic_label_delete_loss += mimic_label_delete_train_data[i].base_loss
            mimic_bias_copy_loss += mimic_bias_copy_train_data[i].base_loss
            mimic_bias_delete_loss += mimic_bias_delete_train_data[i].base_loss
            data_denoise_loss += data_denoise_train_data[i].base_loss
            pre_acc += train_data[i].base_acc
            mimic_label_copy_acc += mimic_label_copy_train_data[i].base_acc
            mimic_label_delete_acc += mimic_label_delete_train_data[i].base_acc
            mimic_bias_copy_acc += mimic_bias_copy_train_data[i].base_acc
            mimic_bias_delete_acc += mimic_bias_delete_train_data[i].base_acc
            data_denoise_acc += data_denoise_train_data[i].base_acc
            pre_bias_loss += train_data[i].bias_loss
            mimic_label_copy_bias_loss += mimic_label_copy_train_data[i].bias_loss
            mimic_label_delete_bias_loss += mimic_label_delete_train_data[i].bias_loss
            mimic_bias_copy_bias_loss += mimic_bias_copy_train_data[i].bias_loss
            mimic_bias_delete_bias_loss += mimic_bias_delete_train_data[i].bias_loss
            data_denoise_bias_loss += data_denoise_train_data[i].bias_loss
            pre_bias_acc += train_data[i].bias_acc
            mimic_label_copy_bias_acc += mimic_label_copy_train_data[i].bias_acc
            mimic_label_delete_bias_acc += mimic_label_delete_train_data[i].bias_acc
            mimic_bias_copy_bias_acc += mimic_bias_copy_train_data[i].bias_acc
            mimic_bias_delete_bias_acc += mimic_bias_delete_train_data[i].bias_acc
            data_denoise_bias_acc += data_denoise_train_data[i].bias_acc
    pre_score /= (T*D*num_candidate)
    mimic_label_copy_score /= (T*D*num_candidate)
    mimic_label_delete_socre /= (T*D*num_candidate)
    mimic_bias_copy_score /= (T*D*num_candidate)
    mimic_bias_delete_score /= (T*D*num_candidate)
    data_denoise_score /= (T*D*num_candidate)
    pre_loss /= (T*D*num_candidate)
    mimic_label_copy_loss /= (T*D*num_candidate)
    mimic_label_delete_loss /= (T*D*num_candidate)
    mimic_bias_copy_loss /= (T*D*num_candidate)
    mimic_bias_delete_loss /= (T*D*num_candidate)
    data_denoise_loss /= (T*D*num_candidate)
    pre_acc /= (T*D*num_candidate)
    mimic_label_copy_acc /= (T*D*num_candidate)
    mimic_label_delete_acc /= (T*D*num_candidate)
    mimic_bias_copy_acc /= (T*D*num_candidate)
    mimic_bias_delete_acc /= (T*D*num_candidate)
    data_denoise_acc /= (T*D*num_candidate)
    pre_bias_loss /= (T*D*num_candidate)
    mimic_label_copy_bias_loss /= (T*D*num_candidate)
    mimic_label_delete_bias_loss /= (T*D*num_candidate)
    mimic_bias_copy_bias_loss /= (T*D*num_candidate)
    mimic_bias_delete_bias_loss /= (T*D*num_candidate)
    data_denoise_bias_loss /= (T*D*num_candidate)
    pre_bias_acc /= (T*D*num_candidate)
    mimic_label_copy_bias_acc /= (T*D*num_candidate)
    mimic_label_delete_bias_acc /= (T*D*num_candidate)
    mimic_bias_copy_bias_acc /= (T*D*num_candidate)
    mimic_bias_delete_bias_acc /= (T*D*num_candidate)
    data_denoise_bias_acc /= (T*D*num_candidate)
    print("original score: ", '%.4f'%pre_score, ", mimic label copy: ", '%.4f'%(mimic_label_copy_score - pre_score), ", mimic label delete: ", '%.4f'%(mimic_label_delete_socre - pre_score), ", mimic bias copy: ", '%.4f'%(mimic_bias_copy_score - pre_score), ", mimic bias delete: ", '%.4f'%(mimic_bias_delete_score - pre_score), ", data denoise: ", '%.4f'%(data_denoise_score - pre_score))
    print("original loss: ", '%.4f'%pre_loss, ", mimic label copy: ", '%.4f'%(mimic_label_copy_loss - pre_loss), ", mimic label delete: ", '%.4f'%(mimic_label_delete_loss - pre_loss), ", mimic bias copy: ", '%.4f'%(mimic_bias_copy_loss - pre_loss), ", mimic bias delete: ", '%.4f'%(mimic_bias_delete_loss - pre_loss), ", data denoise: ", '%.4f'%(data_denoise_loss - pre_loss))
    print("original acc: ", '%.4f'%pre_acc, ", mimic label copy: ", '%.4f'%(mimic_label_copy_acc - pre_acc), ", mimic label delete: ", '%.4f'%(mimic_label_delete_acc - pre_acc), ", mimic bias copy: ", '%.4f'%(mimic_bias_copy_acc - pre_acc), ", mimic bias delete: ", '%.4f'%(mimic_bias_delete_acc - pre_acc), ", data denoise: ", '%.4f'%(data_denoise_acc - pre_acc))
    print("&", "{}\\%".format(noise_level),"&", "PMI", "&", '%.4f'%pre_score, "&", '%.4f'%(mimic_label_copy_score - pre_score), "&", '%.4f'%(mimic_label_delete_socre - pre_score), "&", '%.4f'%(mimic_bias_copy_score - pre_score), "&", '%.4f'%(mimic_bias_delete_score - pre_score), "&", '%.4f'%(data_denoise_score - pre_score), "\\\\")
    print("&", "&", "Loss", "&", '%.4f'%pre_loss, "&", '%.4f'%(mimic_label_copy_loss - pre_loss), "&", '%.4f'%(mimic_label_delete_loss - pre_loss), "&", '%.4f'%(mimic_bias_copy_loss - pre_loss), "&", '%.4f'%(mimic_bias_delete_loss - pre_loss), "&", '%.4f'%(data_denoise_loss - pre_loss), "\\\\")
    print("&", "&", "Acc", "&", '%.4f'%pre_acc, "&", '%.4f'%(mimic_label_copy_acc - pre_acc), "&", '%.4f'%(mimic_label_delete_acc - pre_acc), "&", '%.4f'%(mimic_bias_copy_acc - pre_acc), "&", '%.4f'%(mimic_bias_delete_acc - pre_acc), "&", '%.4f'%(data_denoise_acc - pre_acc), "\\\\")
    print("original loss for cluster a0: ", '%.4f'%pre_bias_loss[0], ", mimic label copy: ", '%.4f'%(mimic_label_copy_bias_loss[0] - pre_bias_loss[0]), ", mimic label delete: ", '%.4f'%(mimic_label_delete_bias_loss[0] - pre_bias_loss[0]), ", mimic bias copy: ", '%.4f'%(mimic_bias_copy_bias_loss[0] - pre_bias_loss[0]), ", mimic bias delete: ", '%.4f'%(mimic_bias_delete_bias_loss[0] - pre_bias_loss[0]), ", data denoise: ", '%.4f'%(data_denoise_bias_loss[0] - pre_bias_loss[0]))
    print("original loss for cluster b0: ", '%.4f'%pre_bias_loss[1], ", mimic label copy: ", '%.4f'%(mimic_label_copy_bias_loss[1] - pre_bias_loss[1]), ", mimic label delete: ", '%.4f'%(mimic_label_delete_bias_loss[1] - pre_bias_loss[1]), ", mimic bias copy: ", '%.4f'%(mimic_bias_copy_bias_loss[1] - pre_bias_loss[1]), ", mimic bias delete: ", '%.4f'%(mimic_bias_delete_bias_loss[1] - pre_bias_loss[1]), ", data denoise: ", '%.4f'%(data_denoise_bias_loss[1] - pre_bias_loss[1]))
    print("original loss for cluster a1: ", '%.4f'%pre_bias_loss[2], ", mimic label copy: ", '%.4f'%(mimic_label_copy_bias_loss[2] - pre_bias_loss[2]), ", mimic label delete: ", '%.4f'%(mimic_label_delete_bias_loss[2] - pre_bias_loss[2]), ", mimic bias copy: ", '%.4f'%(mimic_bias_copy_bias_loss[2] - pre_bias_loss[2]), ", mimic bias delete: ", '%.4f'%(mimic_bias_delete_bias_loss[2] - pre_bias_loss[2]), ", data denoise: ", '%.4f'%(data_denoise_bias_loss[2] - pre_bias_loss[2]))
    print("original loss for cluster b1: ", '%.4f'%pre_bias_loss[3], ", mimic label copy: ", '%.4f'%(mimic_label_copy_bias_loss[3] - pre_bias_loss[3]), ", mimic label delete: ", '%.4f'%(mimic_label_delete_bias_loss[3] - pre_bias_loss[3]), ", mimic bias copy: ", '%.4f'%(mimic_bias_copy_bias_loss[3] - pre_bias_loss[3]), ", mimic bias delete: ", '%.4f'%(mimic_bias_delete_bias_loss[3] - pre_bias_loss[3]), ", data denoise: ", '%.4f'%(data_denoise_bias_loss[3] - pre_bias_loss[3]))
    print("original acc for cluster a0: ", '%.4f'%pre_bias_acc[0], ", mimic label copy: ", '%.4f'%(mimic_label_copy_bias_acc[0] - pre_bias_acc[0]), ", mimic label delete: ", '%.4f'%(mimic_label_delete_bias_acc[0] - pre_bias_acc[0]), ", mimic bias copy: ", '%.4f'%(mimic_bias_copy_bias_acc[0] - pre_bias_acc[0]), ", mimic bias delete: ", '%.4f'%(mimic_bias_delete_bias_acc[0] - pre_bias_acc[0]), ", data denoise: ", '%.4f'%(data_denoise_bias_acc[0] - pre_bias_acc[0]))
    print("original acc for cluster b0: ", '%.4f'%pre_bias_acc[1], ", mimic label copy: ", '%.4f'%(mimic_label_copy_bias_acc[1] - pre_bias_acc[1]), ", mimic label delete: ", '%.4f'%(mimic_label_delete_bias_acc[1] - pre_bias_acc[1]), ", mimic bias copy: ", '%.4f'%(mimic_bias_copy_bias_acc[1] - pre_bias_acc[1]), ", mimic bias delete: ", '%.4f'%(mimic_bias_delete_bias_acc[1] - pre_bias_acc[1]), ", data denoise: ", '%.4f'%(data_denoise_bias_acc[1] - pre_bias_acc[1]))
    print("original acc for cluster a1: ", '%.4f'%pre_bias_acc[2], ", mimic label copy: ", '%.4f'%(mimic_label_copy_bias_acc[2] - pre_bias_acc[2]), ", mimic label delete: ", '%.4f'%(mimic_label_delete_bias_acc[2] - pre_bias_acc[2]), ", mimic bias copy: ", '%.4f'%(mimic_bias_copy_bias_acc[2] - pre_bias_acc[2]), ", mimic bias delete: ", '%.4f'%(mimic_bias_delete_bias_acc[2] - pre_bias_acc[2]), ", data denoise: ", '%.4f'%(data_denoise_bias_acc[2] - pre_bias_acc[2]))
    print("original acc for cluster b1: ", '%.4f'%pre_bias_acc[3], ", mimic label copy: ", '%.4f'%(mimic_label_copy_bias_acc[3] - pre_bias_acc[3]), ", mimic label delete: ", '%.4f'%(mimic_label_delete_bias_acc[3] - pre_bias_acc[3]), ", mimic bias copy: ", '%.4f'%(mimic_bias_copy_bias_acc[3] - pre_bias_acc[3]), ", mimic bias delete: ", '%.4f'%(mimic_bias_delete_bias_acc[3] - pre_bias_acc[3]), ", data denoise: ", '%.4f'%(data_denoise_bias_acc[3] - pre_bias_acc[3]))
            # for i in range(num_candidate):
            #     print("original score: ", train_data[i].score, ", change of score: ", new_train_data[i].score - train_data[i].score)
            #     print("original base loss: ", train_data[i].base_loss, ", change of base loss: ", new_train_data[i].base_loss - train_data[i].base_loss)
            #     # print("original post loss: ", train_data[i].post_loss, ", change of post loss: ", new_train_data[i].post_loss - train_data[i].post_loss)
            #     # print("original smooth loss: ", train_data[i].smooth, ", change of smooth loss: ", new_train_data[i].smooth - train_data[i].smooth)5
            #     print("original base acc: ", train_data[i].base_acc, ", change of base acc: ", new_train_data[i].base_acc - train_data[i].base_acc)
            #     # print("original post acc: ", train_data[i].post_acc, ", change of post acc: ", new_train_data[i].post_acc - train_data[i].post_acc)

  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 100/100 [01:24<00:00,  1.18it/s]

original score:  9.0650 , mimic label copy:  0.7016 , mimic label delete:  -2.0716 , mimic bias copy:  0.7016 , mimic bias delete:  -2.0323 , data denoise:  0.1374
original loss:  0.4296 , mimic label copy:  -0.0171 , mimic label delete:  -0.0234 , mimic bias copy:  -0.0171 , mimic bias delete:  -0.0272 , data denoise:  -0.0385
original acc:  0.8078 , mimic label copy:  0.0188 , mimic label delete:  0.0104 , mimic bias copy:  0.0188 , mimic bias delete:  0.0118 , data denoise:  0.0197
& 5\% & PMI & 9.0650 & 0.7016 & -2.0716 & 0.7016 & -2.0323 & 0.1374 \\
& & Loss & 0.4296 & -0.0171 & -0.0234 & -0.0171 & -0.0272 & -0.0385 \\
& & Acc & 0.8078 & 0.0188 & 0.0104 & 0.0188 & 0.0118 & 0.0197 \\
original loss for cluster a0:  0.4579 , mimic label copy:  -0.0315 , mimic label delete:  -0.0975 , mimic bias copy:  -0.0315 , mimic bias delete:  -0.1077 , data denoise:  -0.0408
original loss for cluster b0:  0.2469 , mimic label copy:  0.0662 , mimic label delete:  0.2104 , mimic bias copy:  0.0662




In [220]:
# sample_test_X_a_0, sample_test_y_a_0 = images_a_0_embedding[4000:4400], torch.zeros(400).to(device)
# sample_test_X_a_1, sample_test_y_a_1 = images_a_1_embedding[4000:4400], torch.ones(400).to(device)
# sample_test_X_b_0, sample_test_y_b_0 = images_b_0_embedding[4000:4400], torch.zeros(400).to(device)
# sample_test_X_b_1, sample_test_y_b_1 = images_b_1_embedding[4000:4400], torch.ones(400).to(device)
# sample_test_X = torch.concatenate([sample_test_X_a_0, sample_test_X_a_1, sample_test_X_b_0, sample_test_X_b_1])
# sample_test_y = torch.concatenate([sample_test_y_a_0, sample_test_y_a_1, sample_test_y_b_0, sample_test_y_b_1])

# test = LogisticRegression(fit_intercept = False, C = penalty, max_iter=5000).fit(sample_test_X.cpu(), sample_test_y.cpu())
# print(test.score(P_x.cpu(), P_y.cpu()))
# mu_test = torch.tensor(test.coef_, dtype=torch.float32, device=device)
# Q_test = compute_hessian(mu_test, sample_test_X)

# L = torch.linalg.cholesky(Q_test)
# lg2 = 2 * torch.sum(torch.log(torch.diagonal(L)))
# train_data_X = torch.concatenate([
#     images_a_0_embedding[:1600],
#     images_b_0_embedding[:400],
#     images_a_1_embedding[:1600],
#     images_b_1_embedding[:400],
#     ])
# train_data_y = torch.concatenate([torch.zeros(1440), torch.ones(160), torch.zeros(360), torch.ones(40+1440), torch.zeros(160), torch.ones(360), torch.zeros(40)]).to(device)
# # train_data_y = torch.concatenate([torch.zeros(2000), torch.ones(2000)]).to(device)
# print(test.score(train_data_X.cpu(), train_data_y.cpu()))
# score, _, _ = compute_data_score_err(mu_test, Q_test, sample_test_X, sample_test_y, train_data_X, train_data_y, lg2)
# print(score)

In [221]:
# sample_test_X_a_0, sample_test_y_a_0 = subsample(images_a_0_embedding[4000:], torch.zeros(1000).to(device), 80)
# sample_test_X_a_1, sample_test_y_a_1 = subsample(images_a_1_embedding[4000:], torch.ones(1000).to(device), 80)
# sample_test_X_b_0, sample_test_y_b_0 = subsample(images_b_0_embedding[4000:], torch.zeros(1000).to(device), 80)
# sample_test_X_b_1, sample_test_y_b_1 = subsample(images_b_1_embedding[4000:], torch.ones(1000).to(device), 80)
# sample_test_X = torch.concatenate([sample_test_X_a_0, sample_test_X_a_1, sample_test_X_b_0, sample_test_X_b_1])
# sample_test_y = torch.concatenate([sample_test_y_a_0, sample_test_y_a_1, sample_test_y_b_0, sample_test_y_b_1])

# test = LogisticRegression(fit_intercept = False, C = penalty, max_iter=5000).fit(sample_test_X.cpu(), sample_test_y.cpu())
# mu_test = torch.tensor(test.coef_, dtype=torch.float32, device=device)
# Q_test = compute_hessian(mu_test, sample_test_X)

# L = torch.linalg.cholesky(Q_test)
# lg2 = 2 * torch.sum(torch.log(torch.diagonal(L)))
# scores = []
# for i in tqdm(range(1000)):
#     train_data_X = torch.concatenate([
#     images_a_0_embedding[:i+1],
#     images_b_0_embedding[:i+1],
#     images_a_1_embedding[:i+1],
#     images_b_1_embedding[:i+1],
#     ])
#     train_data_y = torch.concatenate([torch.zeros(2*i+2), torch.ones(2*i+2)]).to(device)
#     score, _, _ = compute_data_score_err(mu_test, Q_test, sample_test_X, sample_test_y, train_data_X, train_data_y, lg2)
#     scores.append(score)
# # Prepare X-axis values (4 * i)
# x_values = [4 * i for i in range(1000)]

# # Plot scores
# plt.figure(figsize=(8, 6))
# plt.plot(x_values, scores, marker='o', linestyle='-', color='b', label='Score')
# plt.xlabel('Training Data Size')
# plt.ylabel('Score')
# plt.title('Scores vs. Training Data Size')
# plt.legend()
# plt.grid(True)
# plt.show()

In [222]:
# # from pyinstrument import Profiler

# # profiler = Profiler()
# # profiler.start()
# x_label_all = []

# single_smooth_post_L_all = []
# single_smooth_post_H_all = []
# single_score_post_L_all = []
# single_score_post_H_all = []
# single_post_base_loss_L_all = []
# single_post_base_loss_H_all = []
# single_post_base_acc_L_all = []
# single_post_base_acc_H_all = []

# smooth_post_L_all = []
# smooth_post_H_all = []
# score_post_L_all = []
# score_post_H_all = []
# post_base_loss_L_all = []
# post_base_loss_H_all = []
# post_base_acc_L_all = []
# post_base_acc_H_all = []

# single_acc_all_all = []
# acc_all_all = []
# single_loss_all_all = []
# loss_all_all = []
# noise_ratio_all_all = []
# label_mae_all_all = []
# bias_mae_all_all = []

# for test_size in test_size_num:
#     x_label = []

#     single_smooth_post_L = []
#     single_smooth_post_H = []
#     single_score_post_L = []
#     single_score_post_H = []
#     single_post_base_loss_L = []
#     single_post_base_loss_H = []
#     single_post_base_acc_L = []
#     single_post_base_acc_H = []

#     smooth_post_L = []
#     smooth_post_H = []
#     score_post_L = []
#     score_post_H = []
#     post_base_loss_L = []
#     post_base_loss_H = []
#     post_base_acc_L = []
#     post_base_acc_H = []

#     single_acc_all = []
#     acc_all = []
#     single_loss_all = []
#     loss_all = []
#     noise_ratio_all = []
#     label_mae_all = []
#     bias_mae_all = []

#     # for num_candidate in range(candidate_L, candidate_H+1, candidate_step):
#     for train_size in train_size_num:
#         single_total_summary = []
#         total_summary = []
#         total_noise_ratio = []
#         total_label_mae = []
#         total_bias_mae = []
#         for i in range(len(top_candidate)):
#             single_total_summary.append(summary([0,0], [0,0], [0,0], [0,0], result(0,0,0,0,0,0,0,0,0,0,0,0)))
#             total_summary.append(summary([0,0], [0,0], [0,0], [0,0], result(0,0,0,0,0,0,0,0,0,0,0,0)))
#             total_noise_ratio.append(np.zeros(6))
#             total_label_mae.append(np.zeros(6))
#             total_bias_mae.append(np.zeros(6))
#         for d in tqdm(range(D)):
#             test_X = P_x
#             test_y = P_y
#             # test = LogisticRegression(fit_intercept = False, C = penalty, max_iter=50000).fit(test_X.cpu(), test_y.cpu())
#             # acc = test.score(test_X.cpu(), test_y.cpu())
#             # print(acc)
#             # sys.exit()
#             num_candidate = candidate_num
#             train_data = generate_train_cifar10(train_size, num_candidate)
#             single_total_results = []
#             total_results = []
#             single_results = []
#             results = []
#             for i in range(len(top_candidate)):
#                 single_total_results.append(result(0,0,0,0,0,0,0,0,0,0,0,0))
#                 total_results.append(result(0,0,0,0,0,0,0,0,0,0,0,0))
#                 single_results.append([])
#                 results.append([])
#             for t in range(T):
#                 not_found = True
#                 while not_found:
#                     sample_test_X, sample_test_y = subsample(test_X, test_y, test_size)
#                     if torch.sum(sample_test_y)>0 and torch.sum(sample_test_y)<test_size:
#                         not_found = False
                
#                 get_err_score(train_data, sample_test_X, sample_test_y, num_candidate)
#                 sorted_score = sorted(train_data, key=lambda x: x.score, reverse = True)
#                 sorted_post_loss = sorted(train_data, key=lambda x: x.post_loss, reverse = False)
#                 sorted_base_loss = sorted(train_data, key=lambda x: x.base_loss, reverse = False)
#                 sorted_smooth = sorted(train_data, key=lambda x: x.smooth, reverse = False)
#                 sorted_post_acc = sorted(train_data, key=lambda x: x.post_acc, reverse = True)
#                 sorted_base_acc = sorted(train_data, key=lambda x: x.base_acc, reverse = True)

#                 for i in range(len(top_candidate)):
#                     for j in range(top_candidate[i]):
#                         total_noise_ratio[i] += np.array([sorted_score[j].noise_ratio, sorted_post_loss[j].noise_ratio, sorted_base_loss[j].noise_ratio, sorted_smooth[j].noise_ratio, sorted_post_acc[j].noise_ratio, sorted_base_acc[j].noise_ratio])
#                         total_label_mae[i] += np.array([sorted_score[j].label_mae, sorted_post_loss[j].label_mae, sorted_base_loss[j].label_mae, sorted_smooth[j].label_mae, sorted_post_acc[j].label_mae, sorted_base_acc[j].label_mae])
#                         total_bias_mae[i] += np.array([sorted_score[j].bias_mae, sorted_post_loss[j].bias_mae, sorted_base_loss[j].bias_mae, sorted_smooth[j].bias_mae, sorted_post_acc[j].bias_mae, sorted_base_acc[j].bias_mae])

#                 score_X = sorted_score[0].X
#                 score_y = sorted_score[0].y
#                 post_loss_X = sorted_post_loss[0].X
#                 post_loss_y = sorted_post_loss[0].y
#                 base_loss_X = sorted_base_loss[0].X
#                 base_loss_y = sorted_base_loss[0].y
#                 smooth_X = sorted_smooth[0].X
#                 smooth_y = sorted_smooth[0].y
#                 post_acc_X = sorted_post_acc[0].X
#                 post_acc_y = sorted_post_acc[0].y
#                 base_acc_X = sorted_base_acc[0].X
#                 base_acc_y = sorted_base_acc[0].y

#                 for j in range(1, top_candidate[-1]):
#                     score_X = torch.concatenate((score_X, sorted_score[j].X), axis = 0)
#                     score_y = torch.concatenate((score_y, sorted_score[j].y), axis = 0)
#                     post_loss_X = torch.concatenate((post_loss_X, sorted_post_loss[j].X), axis = 0)
#                     post_loss_y = torch.concatenate((post_loss_y, sorted_post_loss[j].y), axis = 0)
#                     base_loss_X = torch.concatenate((base_loss_X, sorted_base_loss[j].X), axis = 0)
#                     base_loss_y = torch.concatenate((base_loss_y, sorted_base_loss[j].y), axis = 0)
#                     smooth_X = torch.concatenate((smooth_X, sorted_smooth[j].X), axis = 0)
#                     smooth_y = torch.concatenate((smooth_y, sorted_smooth[j].y), axis = 0)
#                     post_acc_X = torch.concatenate((post_acc_X, sorted_post_acc[j].X), axis = 0)
#                     post_acc_y = torch.concatenate((post_acc_y, sorted_post_acc[j].y), axis = 0)
#                     base_acc_X = torch.concatenate((base_acc_X, sorted_base_acc[j].X), axis = 0)
#                     base_acc_y = torch.concatenate((base_acc_y, sorted_base_acc[j].y), axis = 0)

#                 for i in range(len(top_candidate)):
#                     single_score_loss, single_score_acc, score_loss, score_acc = compute_data_err_acc(test_X, test_y, score_X[:int(top_candidate[i]*train_size)], score_y[:int(top_candidate[i]*train_size)])
#                     single_post_loss_loss, single_post_loss_acc, post_loss_loss, post_loss_acc = compute_data_err_acc(test_X, test_y, post_loss_X[:int(top_candidate[i]*train_size)], post_loss_y[:int(top_candidate[i]*train_size)])
#                     single_base_loss_loss, single_base_loss_acc, base_loss_loss, base_loss_acc = compute_data_err_acc(test_X, test_y, base_loss_X[:int(top_candidate[i]*train_size)], base_loss_y[:int(top_candidate[i]*train_size)])
#                     single_smooth_loss, single_smooth_acc, smooth_loss, smooth_acc = compute_data_err_acc(test_X, test_y, smooth_X[:int(top_candidate[i]*train_size)], smooth_y[:int(top_candidate[i]*train_size)])
#                     single_post_acc_loss, single_post_acc_acc, post_acc_loss, post_acc_acc = compute_data_err_acc(test_X, test_y, post_acc_X[:int(top_candidate[i]*train_size)], post_acc_y[:int(top_candidate[i]*train_size)])
#                     single_base_acc_loss, single_base_acc_acc, base_acc_loss, base_acc_acc = compute_data_err_acc(test_X, test_y, base_acc_X[:int(top_candidate[i]*train_size)], base_acc_y[:int(top_candidate[i]*train_size)])

#                     single_resi = result(single_score_loss, single_score_acc, single_post_loss_loss, single_post_loss_acc, single_base_loss_loss, single_base_loss_acc, single_smooth_loss, single_smooth_acc, single_post_acc_loss, single_post_acc_acc, single_base_acc_loss, single_base_acc_acc)
#                     resi = result(score_loss, score_acc, post_loss_loss, post_loss_acc, base_loss_loss, base_loss_acc, smooth_loss, smooth_acc, post_acc_loss, post_acc_acc, base_acc_loss, base_acc_acc)
#                     single_results[i].append(single_resi)
#                     results[i].append(resi)
#                     single_total_results[i].add(single_resi)
#                     total_results[i].add(resi)

#                     # print(single_resi.getacc(), single_resi.getloss(), resi.getacc(), resi.getloss())
#                     # sys.exit()
#             for i in range(len(top_candidate)):
#                 single_total_results[i].divide(T)
#                 total_results[i].divide(T)
#                 single_summ = summary([0, 0], [0, 0], [0, 0], [0, 0], single_total_results[i])
#                 summ = summary([0, 0], [0, 0], [0, 0], [0, 0], total_results[i])

#                 for t in range(T):
#                     if single_results[i][t].smooth_acc >= single_results[i][t].post_loss_acc:
#                         single_summ.percentage_smooth_post[1] += 1/T*100
#                     if single_results[i][t].smooth_acc > single_results[i][t].post_loss_acc:
#                         single_summ.percentage_smooth_post[0] += 1/T*100
#                     if single_results[i][t].score_acc >= single_results[i][t].post_loss_acc:
#                         single_summ.percentage_score_post[1] += 1/T*100
#                     if single_results[i][t].score_acc > single_results[i][t].post_loss_acc:
#                         single_summ.percentage_score_post[0] += 1/T*100
#                     if single_results[i][t].post_loss_acc >= single_results[i][t].base_loss_acc:
#                         single_summ.percentage_post_base_loss[1] += 1/T*100
#                     if single_results[i][t].post_loss_acc > single_results[i][t].base_loss_acc:
#                         single_summ.percentage_post_base_loss[0] += 1/T*100
#                     if single_results[i][t].post_acc_acc >= single_results[i][t].base_acc_acc:
#                         single_summ.percentage_post_base_acc[1] += 1/T*100
#                     if single_results[i][t].post_acc_acc > single_results[i][t].base_acc_acc:
#                         single_summ.percentage_post_base_acc[0] += 1/T*100

#                     if results[i][t].smooth_acc >= results[i][t].post_loss_acc:
#                         summ.percentage_smooth_post[1] += 1/T*100
#                     if results[i][t].smooth_acc > results[i][t].post_loss_acc:
#                         summ.percentage_smooth_post[0] += 1/T*100
#                     if results[i][t].score_acc >= results[i][t].post_loss_acc:
#                         summ.percentage_score_post[1] += 1/T*100
#                     if results[i][t].score_acc > results[i][t].post_loss_acc:
#                         summ.percentage_score_post[0] += 1/T*100
#                     if results[i][t].post_loss_acc >= results[i][t].base_loss_acc:
#                         summ.percentage_post_base_loss[1] += 1/T*100
#                     if results[i][t].post_loss_acc > results[i][t].base_loss_acc:
#                         summ.percentage_post_base_loss[0] += 1/T*100
#                     if results[i][t].post_acc_acc >= results[i][t].base_acc_acc:
#                         summ.percentage_post_base_acc[1] += 1/T*100
#                     if results[i][t].post_acc_acc > results[i][t].base_acc_acc:
#                         summ.percentage_post_base_acc[0] += 1/T*100
#                 single_total_summary[i].add(single_summ)
#                 total_summary[i].add(summ)
#         for i in range(len(top_candidate)):
#             single_total_summary[i].divide(D)
#             total_summary[i].divide(D)
#             total_noise_ratio[i] = total_noise_ratio[i] / (T*D*top_candidate[i])
#             total_label_mae[i] = total_label_mae[i] / (T*D*top_candidate[i])
#             total_bias_mae[i] = total_bias_mae[i] / (T*D*top_candidate[i])
#             label = str(top_candidate[i]) + ", " + str(test_size) + ", " + str(num_candidate)
#             x_label.append(label)
#             single_acc = single_total_summary[i].getacc()
#             acc = total_summary[i].getacc()
#             single_loss = single_total_summary[i].getloss()
#             loss = total_summary[i].getloss()
#             print("Acc " + "& $"+label+"$ & " + "{:.4f}".format(single_acc[0]) + " & " + "{:.4f}".format(single_acc[1]) + " & " + "{:.4f}".format(single_acc[2]) + " & " + "{:.4f}".format(single_acc[3]) + " & " + "{:.4f}".format(single_acc[4]) + " & " + "{:.4f}".format(single_acc[5]) +" \\\\ " )
#             print("Aver. Acc " + "& $"+label+"$ & " + "{:.4f}".format(acc[0]) + " & " + "{:.4f}".format(acc[1]) + " & " + "{:.4f}".format(acc[2]) + " & " + "{:.4f}".format(acc[3]) + " & " + "{:.4f}".format(acc[4]) + " & " + "{:.4f}".format(acc[5]) +" \\\\ " )
#             # print("$"+label+"$ & " + "{:.4f}".format(single_acc[0]-acc[0]) + " & " + "{:.4f}".format(single_acc[1]-acc[1]) + " & " + "{:.4f}".format(single_acc[2]-acc[2]) + " & " + "{:.4f}".format(single_acc[3]-acc[3]) + " & " + "{:.4f}".format(single_acc[4]-acc[4]) + " & " + "{:.4f}".format(single_acc[5]-acc[5]) + " \\\\ " )
#             print("Loss " + "& $"+label+"$ & " + "{:.4f}".format(single_loss[0]) + " & " + "{:.4f}".format(single_loss[1]) + " & " + "{:.4f}".format(single_loss[2]) + " & " + "{:.4f}".format(single_loss[3]) + " & " + "{:.4f}".format(single_loss[4]) + " & " + "{:.4f}".format(single_loss[5]) +" \\\\ " )
#             print("Aver. Loss " + "& $"+label+"$ & " + "{:.4f}".format(loss[0]) + " & " + "{:.4f}".format(loss[1]) + " & " + "{:.4f}".format(loss[2]) + " & " + "{:.4f}".format(loss[3]) + " & " + "{:.4f}".format(loss[4]) + " & " + "{:.4f}".format(loss[5]) +" \\\\ " )
#             print("Noise Ratio " + "& $" + label + "$ & " + "{:.4f}".format(total_noise_ratio[i][0]) + " & " + "{:.4f}".format(total_noise_ratio[i][1]) + " & " + "{:.4f}".format(total_noise_ratio[i][2]) + " & " + "{:.4f}".format(total_noise_ratio[i][3]) + " & " + "{:.4f}".format(total_noise_ratio[i][4]) + " & " + "{:.4f}".format(total_noise_ratio[i][5]) +" \\\\ " )
#             print("Label MAE " + "& $" + label + "$ & " + "{:.4f}".format(total_label_mae[i][0]) + " & " + "{:.4f}".format(total_label_mae[i][1]) + " & " + "{:.4f}".format(total_label_mae[i][2]) + " & " + "{:.4f}".format(total_label_mae[i][3]) + " & " + "{:.4f}".format(total_label_mae[i][4]) + " & " + "{:.4f}".format(total_label_mae[i][5]) +" \\\\ " + " \\hline")
#             print("Bias MAE " + "& $" + label + "$ & " + "{:.4f}".format(total_bias_mae[i][0]) + " & " + "{:.4f}".format(total_bias_mae[i][1]) + " & " + "{:.4f}".format(total_bias_mae[i][2]) + " & " + "{:.4f}".format(total_bias_mae[i][3]) + " & " + "{:.4f}".format(total_bias_mae[i][4]) + " & " + "{:.4f}".format(total_bias_mae[i][5]) +" \\\\ " + " \\hline")
#             # print("$"+label+"$ & " + "{:.4f}".format(single_loss[0]-loss[0]) + " & " + "{:.4f}".format(single_loss[1]-loss[1]) + " & " + "{:.4f}".format(single_loss[2]-loss[2]) + " & " + "{:.4f}".format(single_loss[3]-loss[3]) + " & " + "{:.4f}".format(single_loss[4]-loss[4]) + " & " + "{:.4f}".format(single_loss[5]-loss[5]) + " \\\\ " )
#             single_smooth_post_L.append(single_total_summary[i].percentage_smooth_post[0])
#             single_smooth_post_H.append(single_total_summary[i].percentage_smooth_post[1])
#             single_score_post_L.append(single_total_summary[i].percentage_score_post[0])
#             single_score_post_H.append(single_total_summary[i].percentage_score_post[1])
#             single_post_base_loss_L.append(single_total_summary[i].percentage_post_base_loss[0])
#             single_post_base_loss_H.append(single_total_summary[i].percentage_post_base_loss[1])
#             single_post_base_acc_L.append(single_total_summary[i].percentage_post_base_acc[0])
#             single_post_base_acc_H.append(single_total_summary[i].percentage_post_base_acc[1])

#             smooth_post_L.append(total_summary[i].percentage_smooth_post[0])
#             smooth_post_H.append(total_summary[i].percentage_smooth_post[1])
#             score_post_L.append(total_summary[i].percentage_score_post[0])
#             score_post_H.append(total_summary[i].percentage_score_post[1])
#             post_base_loss_L.append(total_summary[i].percentage_post_base_loss[0])
#             post_base_loss_H.append(total_summary[i].percentage_post_base_loss[1])
#             post_base_acc_L.append(total_summary[i].percentage_post_base_acc[0])
#             post_base_acc_H.append(total_summary[i].percentage_post_base_acc[1])

#             single_acc_all.append(single_acc)
#             acc_all.append(acc)
#             single_loss_all.append(single_loss)
#             loss_all.append(loss)
#             noise_ratio_all.append(total_noise_ratio[i])
#             label_mae_all.append(total_label_mae[i])
#             bias_mae_all.append(total_bias_mae[i])
#         # profiler.stop()

#         # profiler.print()
#         # sys.exit()
    
#     x_label_all += x_label
#     single_smooth_post_L_all += single_smooth_post_L
#     single_smooth_post_H_all += single_smooth_post_H
#     single_score_post_L_all += single_score_post_L
#     single_score_post_H_all += single_score_post_H
#     single_post_base_loss_L_all += single_post_base_loss_L
#     single_post_base_loss_H_all += single_post_base_loss_H
#     single_post_base_acc_L_all += single_post_base_acc_L
#     single_post_base_acc_H_all += single_post_base_acc_H

#     smooth_post_L_all += smooth_post_L
#     smooth_post_H_all += smooth_post_H
#     score_post_L_all += score_post_L
#     score_post_H_all += score_post_H
#     post_base_loss_L_all += post_base_loss_L
#     post_base_loss_H_all += post_base_loss_H
#     post_base_acc_L_all += post_base_acc_L
#     post_base_acc_H_all += post_base_acc_H

#     single_acc_all_all += single_acc_all
#     acc_all_all += acc_all
#     single_loss_all_all += single_loss_all
#     loss_all_all += loss_all
#     noise_ratio_all_all += noise_ratio_all
#     label_mae_all_all += label_mae_all
#     bias_mae_all_all += bias_mae_all

#     generate_plot(x_label, single_smooth_post_L, single_smooth_post_H, 'Posterior Predictive', 'Small Smooth', 'single_smooth_post')
#     generate_plot(x_label, single_score_post_L, single_score_post_H, 'Posterior Predictive', 'PMI', 'single_score_post')
#     generate_plot(x_label, single_post_base_loss_L, single_post_base_loss_H, 'Cross Entropy', 'Average Cross Entropy', 'single_post_base_loss')
#     generate_plot(x_label, single_post_base_acc_L, single_post_base_acc_H, 'Accuracy', 'Average Accuracy', 'single_post_base_acc')
#     generate_plot(x_label, smooth_post_L, smooth_post_H, 'Posterior Predictive', 'Small Smooth', 'smooth_post')
#     generate_plot(x_label, score_post_L, score_post_H, 'Posterior Predictive', 'PMI', 'score_post')
#     generate_plot(x_label, post_base_loss_L, post_base_loss_H, 'Cross Entropy', 'Average Cross Entropy', 'post_base_loss')
#     generate_plot(x_label, post_base_acc_L, post_base_acc_H, 'Accuracy', 'Average Accuracy', 'post_base_acc')

# # profiler.stop()

# # profiler.print()
