In [1]:
import os

import torch
import numpy as np

In [2]:
from torchvision import datasets, transforms
from torchvision.transforms import InterpolationMode
mean, std = (0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.262)
transform_train_proper = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean, std),
        ])
transform_blurred = transforms.Compose([
    transforms.ToTensor(), 
    transforms.Resize(8, interpolation=InterpolationMode.BILINEAR, antialias=None), 
    transforms.Resize(32, interpolation=InterpolationMode.BILINEAR, antialias=None),
    transforms.Normalize(mean, std)
    ])

In [3]:
from src.data.datasets import get_cifar10

In [4]:
from src import utils
DATASET_NAME = 'cifar10'

def get_held_out_data(dataset_name, nb_samples=50):
    train_dataset, _, _ = get_cifar10()
    x_data = np.array(train_dataset.data)
    y_data = np.array(train_dataset.targets)
    num_classes = len(np.unique(y_data))
    nb_samples_per_class = nb_samples // num_classes
    idxs = []
    for i in range(num_classes):
        idxs_i = np.where(y_data == i)[0]
        sampled_idxs_i = np.random.choice(idxs_i, size=nb_samples_per_class, replace=False)
        idxs.append(sampled_idxs_i)
        
    idxs = np.concatenate(idxs)
    x_data = x_data[idxs]
    y_data = y_data[idxs]
    
    if not os.path.exists('data'):
        os.mkdir('data')
    np.save(f'data/{dataset_name}_held_out_x.npy', x_data)
    np.save(f'data/{dataset_name}_held_out_y.npy', y_data)
    return x_data, y_data

In [5]:
x_data, y_data = get_held_out_data(DATASET_NAME, nb_samples=1000)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [6]:
x_data_proper = [transform_train_proper(x) for x in x_data]
x_data_proper = torch.stack(x_data_proper)
y_data_proper = torch.from_numpy(y_data)

torch.save(x_data_proper, f'data/{DATASET_NAME}_held_out_proper_x.pt')
torch.save(y_data_proper, f'data/{DATASET_NAME}_held_out_proper_y.pt')

In [None]:
x_data_blurred = [transform_blurred(x) for x in x_data]
x_data_blurred = torch.stack(x_data_blurred)
y_data_blurred = torch.from_numpy(y_data)

torch.save(x_data_blurred, f'data/{DATASET_NAME}_held_out_blurred_x.pt')
torch.save(y_data_blurred, f'data/{DATASET_NAME}_held_out_blurred_y.pt')

In [None]:
from src.utils.prepare import prepare_loaders
batch_size = 500
loaders_params = {'batch_size': batch_size, 'pin_memory': True, 'num_workers': 8}
loaders = prepare_loaders(DATASET_NAME, {}, loaders_params)

In [None]:
x, y = next(iter(loaders['train']))

In [None]:
import matplotlib.pyplot as plt
plt.hist(y.cpu().numpy())

# Clustering

In [None]:
x_data = x_data_proper.reshape(x_data_proper.size(0), -1)
x_data /= x_data.norm(dim=1, keepdim=True)
sim_m = x_data @ x_data.T

In [None]:
def retrieve_info(cluster_labels, y_train):
    # Initializing
    unsolicited_ratio = 0.0
    denominator = 0.0
    reference_labels = {}
    # For loop to run through each label of cluster label
    for label in range(len(np.unique(y_train))):
        index = np.where(cluster_labels==label, 1, 0)
        dist = np.bincount(y_train[index==1])
        num = dist.argmax()
        unsolicited_ratio += (dist.sum() - dist.max())
        denominator += dist.sum()
        reference_labels[label] = num
    proper_labels = [reference_labels[label] for label in cluster_labels]
    proper_labels = np.array(proper_labels)
    unsolicited_ratio /= denominator
    return proper_labels, unsolicited_ratio

In [None]:
sim_m

In [None]:
from sklearn.cluster import SpectralClustering 

similarity_matrix_ = sim_m.cpu().numpy()
labels_pred = SpectralClustering(n_clusters=10, affinity='precomputed', n_init=100, assign_labels='discretize').fit_predict((1+similarity_matrix_))

In [None]:
labels_pred, unsolicited_ratio = retrieve_info(labels_pred, y_data)
unsolicited_ratio

In [None]:
labels_pred

In [None]:
y_data_proper

In [None]:
acc = (labels_pred == y_data_proper.numpy()).astype(float).sum() / y_data_proper.shape[0]
acc

In [None]:
(labels_pred == y_data_proper.numpy())

In [11]:
torch.tensor([[1, 2, 3], [4, 5, 6]]).max(dim=0)

torch.return_types.max(
values=tensor([4, 5, 6]),
indices=tensor([1, 1, 1]))

In [12]:
import os

import torch
import torchvision
import torchvision.transforms as transforms

In [13]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 4

trainset = torchvision.datasets.CIFAR10(root=os.environ['CIFAR10_PATH'], train=True,
                                        download=False, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root=os.environ['CIFAR10_PATH'], train=False,
                                       download=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

In [17]:
x,y = next(iter(testloader))

In [1]:
2.5e-2

0.025

In [3]:
import numpy as np

np.linspace(0, 800, 6)

array([  0., 160., 320., 480., 640., 800.])

In [None]:
fr

# Half

In [4]:
from src.data.datasets import get_cifar10
DATASET_NAME = 'cifar10'

def get_held_out_data(dataset_name, nb_samples=50):
    train_dataset, _, _ = get_cifar10()
    y_data = np.array(train_dataset.targets)
    print(len(y_data))
    num_classes = len(np.unique(y_data))
    nb_samples_per_class = nb_samples // num_classes
    idxs = []
    for i in range(num_classes):
        idxs_i = np.where(y_data == i)[0]
        sampled_idxs_i = np.random.choice(idxs_i, size=nb_samples_per_class, replace=False)
        idxs.append(sampled_idxs_i)
        
    idxs = np.concatenate(idxs)
    
    if not os.path.exists('data'):
        os.mkdir('data')
    np.save(f'data/{dataset_name}_idxs.npy', idxs)

In [5]:
get_held_out_data(DATASET_NAME, nb_samples=50000//2)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
50000


In [10]:
subset_path = 'data/cifar10_idxs.npy'

In [9]:
from src.data.datasets import get_cifar10
dataset_params = {'dataset_path': None, 'whether_aug': True, 'proper_normalization': True, 'subset_path': 'data/cifar10_idxs.npy'}
datasets = get_cifar10(**dataset_params)

Files already downloaded and verified


UnpicklingError: STACK_GLOBAL requires str

In [15]:
from torchvision import datasets, transforms
dataset_path =  os.environ['CIFAR10_PATH']
train_dataset = datasets.CIFAR10(dataset_path, train=True, download=True)

selected_indices = torch.tensor(np.load(subset_path))
train_dataset = torch.utils.data.Subset(train_dataset, selected_indices)

Files already downloaded and verified


In [13]:
selected_indices.shape

torch.Size([25000])

In [16]:
train_dataset

<torch.utils.data.dataset.Subset at 0x7f2521c99a50>

In [17]:
len(train_dataset)

25000

# ResNet-18

In [1]:
from src.utils.prepare import prepare_model



In [2]:
model_config = {'backbone_type': 'resnet18',
                    'only_features': False,
                    'batchnorm_layers': True,
                    'width_scale': 1.0,
                    'skips': True,
                    'modify_resnet': True}
model_params = {'model_config': model_config, 'num_classes': 10, 'dataset_name': 'cifar10'}

model = prepare_model('resnet_tunnel', model_params=model_params)

In [3]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): Identity()
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), p