In [1]:
import torch
import torch.nn.functional as F
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from rac.pred_models import CustomTensorDataset, ACCNet
import numpy as np
np.random.seed(42)

In [2]:
transform = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
        ])

batch_size = 256

trainset = torchvision.datasets.CIFAR10(root='../datasets/cifar10_original_data', train=True,
                                        download=True, transform=transform)
#trainset.data = trainset.data
#trainset.targets = trainset.targets
X_train = trainset.data
y_train = trainset.targets
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=8)

testset = torchvision.datasets.CIFAR10(root='../datasets/cifar10_original_data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=1024, num_workers=8)

X_test = testset.data
y_test = testset.targets

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

    

Files already downloaded and verified
Files already downloaded and verified


In [5]:
import numpy as np
X_train = np.load("X_train_cifar.npy")
y_train = np.load("Y_train_cifar.npy")
X_test = np.load("X_test_cifar.npy")
y_test = np.load("Y_test_cifar.npy")

In [122]:
from sklearn.datasets import make_classification
X, Y = make_classification(
    n_samples=5000,
    n_features=10,
    n_informative=10,
    n_redundant=0,
    n_repeated=0,
    n_classes=10,
    n_clusters_per_class=1,
    weights=None,
    flip_y=0,
    class_sep=1.0,
    hypercube=True,
    shift=0.0,
    scale=1.0,
    shuffle=True,
    random_state=42)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [129]:
y_train.shape

(4000,)

In [86]:
#X = np.load("../datasets/cifar10_data/cifar10_embedding.npy")
#Y = np.load("../datasets/cifar10_data/cifar10_labels.npy")

In [87]:
#from sklearn import preprocessing
#X = preprocessing.StandardScaler().fit_transform(X)
#from sklearn.decomposition import PCA
#pca = PCA(n_components=100)
#X = pca.fit_transform(X)

In [88]:
#from sklearn.model_selection import train_test_split
#X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [89]:
#X.shape

In [123]:
def clustering_from_clustering_solution(clustering_solution):
    num_clusters = np.max(clustering_solution) + 1
    clustering = [[] for _ in range(num_clusters)]
    for i in range(len(clustering_solution)):
        clustering[clustering_solution[i]].append(i)
    return clustering, num_clusters

def sim_matrix_from_clustering(clustering, N):
    pairwise_similarities = -np.ones((N, N))
    for cind in clustering:
        pairwise_similarities[np.ix_(cind, cind)] = 1
    return pairwise_similarities

In [124]:
train_sol = clustering_from_clustering_solution(y_train[:10000])
train_sim_matrix = sim_matrix_from_clustering(train_sol[0], len(y_train[:10000]))

test_sol = clustering_from_clustering_solution(y_test[:10000])
test_sim_matrix = sim_matrix_from_clustering(test_sol[0], len(y_test[:10000]))

In [125]:
train_sim_matrix.shape

(4000, 4000)

In [131]:
def get_pairs(prop_pos, prop_neg, sim_matrix, data):
    N = sim_matrix.shape[0]
    lower_triangle_indices = np.tril_indices(N, -1)
    ind_pos = np.where(sim_matrix[lower_triangle_indices] == 1)[0]
    ind_neg = np.where(sim_matrix[lower_triangle_indices] == -1)[0]
    num_pos = int(len(ind_pos)*prop_pos)
    num_neg = int(len(ind_neg)*prop_neg)
    print("num_pos: ", num_pos)
    print("num_neg: ", num_neg)
    ind_pos = np.random.choice(ind_pos, num_pos)
    ind_neg = np.random.choice(ind_neg, num_neg)
    if num_pos < num_neg:
        indices = np.concatenate([ind_neg, ind_pos])
    else:
        indices = np.concatenate([ind_pos, ind_neg])
    ind1, ind2 = lower_triangle_indices[0][indices], lower_triangle_indices[1][indices]
    x1 = data[ind1]
    x2 = data[ind2]
    y = sim_matrix[ind1, ind2]
    lab1 = np.where(y >= 0)
    lab2 = np.where(y < 0)
    y[lab1] = 1.0
    y[lab2] = 0.0
    return x1, x2, y

In [139]:
x1_train, x2_train, y_train_pairs = get_pairs(0.005, 0.005, train_sim_matrix, X_train)
x1_test, x2_test, y_test_pairs = get_pairs(0.04, 0.04, test_sim_matrix, X_test)

num_pos:  3991
num_neg:  35998
num_pos:  1994
num_neg:  17985


In [742]:
cifar_training_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

cifar_test_transform = transforms.Compose([
    #transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

train_dataset = CustomTensorDataset(x1_train, x2_train, y_train_pairs, train=True, transform=cifar_training_transform)
test_dataset = CustomTensorDataset(x1_test, x2_test, y_test_pairs, transform=cifar_test_transform)  

In [140]:
train_dataset = CustomTensorDataset(torch.Tensor(x1_train), torch.Tensor(x2_train), torch.Tensor(y_train_pairs), train=True, transform=None)
test_dataset = CustomTensorDataset(torch.Tensor(x1_test), torch.Tensor(x2_test), torch.Tensor(y_test_pairs), transform=None)

In [141]:
batch_size = 16
#class_sample_count = [10, 1, 20, 3, 4] # dataset has 10 class-1 samples, 1 class-2 samples, etc.
class_sample_count = np.unique(y_train_pairs, return_counts=True)[1].tolist()
#print(class_sample_count)
weights = 1/torch.Tensor(class_sample_count)
weights = weights[y_train_pairs]
#print(weights)
#sampler = torch.utils.data.sampler.WeightedRandomSampler(weights=weights, num_samples=len(weights), replacement=True)
#train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, sampler=sampler)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, shuffle=False, batch_size=1024)
#trainloader = data_utils.DataLoader(train_dataset, batch_size = batch_size, shuffle=True, sampler = sampler)

In [142]:
#for x1, x2, y in train_loader:
    #print(np.unique(y, return_counts=True))

In [143]:
len(train_dataset)

71996

In [144]:
criterion = nn.BCEWithLogitsLoss()
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [145]:
device

device(type='cuda', index=0)

In [146]:
net = ACCNet(base_net="three_layer_net", siamese=True, input_dim=10, p=0.5).to(device)

In [147]:
optimizer = torch.optim.Adam(net.parameters(), lr=0.0001)
#optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
#optimizer = torch.optim.SGD(net.parameters(), lr=0.0005, momentum=0.9)
#optimizer = torch.optim.SGD(net.parameters(), lr=0.0005, momentum=0.9, weight_decay=5e-4)
print("training...")
print(len(train_dataset))
net.train()
for epoch in range(150):  # loop over the dataset multiple times
    print(epoch)
    running_loss = 0.0
    step = 0
    for i, data in enumerate(train_loader, 0):
        #print(np.unique(data[2], return_counts=True))
        #print(data[2])
        # get the inputs; data is a list of [inputs, labels]
        x1, x2, labels = data[0].to(device), data[1].to(device), data[2].to(device)
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = net(x1, x2)
        outputs = outputs.reshape((outputs.shape[0]))
        #labels = labels.reshape((labels.shape[0], 1))
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        # print statistics
        running_loss += loss.item()
        step += 1
    print("loss: ", running_loss/step)
    step = 0
    running_loss = 0.0

training...
71996
0
loss:  0.6439411264591747
1
loss:  0.4942533004548815
2
loss:  0.36534165697958737
3
loss:  0.27353165545976826
4
loss:  0.21810425380244852
5
loss:  0.17567392035796203
6
loss:  0.14667808479802785
7
loss:  0.1244002818390727
8
loss:  0.10896450776987088
9
loss:  0.09496620143197167
10
loss:  0.08579845904098733
11
loss:  0.0781878107434669
12
loss:  0.07047317224656904
13
loss:  0.06480762602030882
14
loss:  0.061026785379657264
15
loss:  0.055864155458131184
16
loss:  0.052307698093522856
17
loss:  0.04817258003979643
18
loss:  0.046511807226831375
19
loss:  0.04513697671993739
20
loss:  0.0424493223636285
21
loss:  0.0395495519141627
22
loss:  0.037830579976161564
23
loss:  0.035706712724073844
24
loss:  0.03339906605039343
25
loss:  0.032666830238965806
26
loss:  0.03101576941893331
27
loss:  0.031044589327617965
28
loss:  0.028582480365423383
29
loss:  0.02704673020204614
30
loss:  0.027018757194244042
31
loss:  0.026801318595744306
32
loss:  0.025270354269596

In [148]:
from scipy.stats import entropy as scipy_entropy
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
probs = torch.zeros([len(test_dataset), 2]).to(device)
net.eval()
with torch.no_grad():
    evaluated_instances = 0
    for data in test_loader:
        x1, x2, labels = data[0].to(device), data[1].to(device), data[2].to(device)
        # calculate outputs by running images through the network
        outputs = net(x1, x2)
        pred = nn.Sigmoid()(outputs)
        #print(pred)
        pred = pred.cpu().numpy()
        pred = pred.reshape(pred.shape[0])
        prob = torch.Tensor([1-pred, pred]).T.to(device)
        start_slice = evaluated_instances
        end_slice = start_slice + x1.shape[0]
        probs[start_slice:end_slice] = prob
        evaluated_instances = end_slice
        #entropy = scipy_entropy(prob)
        #pred[pred >= 0.5] = 1
        #pred[pred < 0.5] = 0
        #print(torch.max(pred.data, 1)[1])
        # the class with the highest energy is what we choose as prediction
        #_, predicted = torch.max(pred.data, 1)
probs = probs.cpu().numpy()
preds = np.argmax(probs, axis=1)
entropys = scipy_entropy(probs.T)

In [149]:
# Ensure model is on right device and is in TRAIN mode.
# Train mode is needed to activate randomness in dropout modules.
n_drop = 10
net.train()
# Create a tensor to hold probabilities
num_classes = 2
probs_mc = torch.zeros([len(test_dataset), 2]).to(device)
# Create a dataloader object to load the dataset

with torch.no_grad():
    # Repeat n_drop number of times to obtain n_drop dropout samples per data instance
    for i in range(n_drop):
        evaluated_instances = 0
        for batch_idx, data in enumerate(test_loader):
            # Calculate softmax (probabilities) of predictions
            x1, x2, labels = data[0].to(device), data[1].to(device), data[2].to(device)
            outputs = net(x1, x2)
            pred = nn.Sigmoid()(outputs)
            pred = pred.cpu().numpy()
            pred = pred.reshape(pred.shape[0])
            prob = torch.Tensor([1-pred, pred]).T.to(device)
        
            # Accumulate the calculated batch of probabilities into the tensor to return
            start_slice = evaluated_instances
            end_slice = start_slice + x1.shape[0]
            probs_mc[start_slice:end_slice] += prob
            evaluated_instances = end_slice

# Divide through by n_drop to get average prob.
probs_mc /= n_drop
probs_mc = probs_mc.cpu().numpy()
preds_mc = np.argmax(probs_mc, axis=1)
entropys_mc = scipy_entropy(probs_mc.T)


In [180]:
#threshold = 0.01
threshold = 1e-13


In [181]:
inds = np.where(entropys <= threshold)[0]
preds_new = preds[inds]
y_test_pairs_new = y_test_pairs[inds]


In [182]:
inds = np.where(entropys_mc <= threshold)[0]
preds_new_mc = preds_mc[inds]
y_test_pairs_new_mc = y_test_pairs[inds]

In [183]:
from sklearn.metrics import classification_report
print(classification_report(y_test_pairs_new, preds_new))

              precision    recall  f1-score   support

         0.0       1.00      0.99      1.00      9407
         1.0       0.94      0.99      0.96       836

    accuracy                           0.99     10243
   macro avg       0.97      0.99      0.98     10243
weighted avg       0.99      0.99      0.99     10243



In [184]:
from sklearn.metrics import classification_report
print(classification_report(y_test_pairs_new_mc, preds_new_mc))

              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      4182
         1.0       1.00      1.00      1.00       220

    accuracy                           1.00      4402
   macro avg       1.00      1.00      1.00      4402
weighted avg       1.00      1.00      1.00      4402

