In [1]:
import sys
sys.path.insert(0, './../../../Models')
from sphere_points import generate_points

import numpy as np
np.random.seed(0)
from sklearn import metrics
import matplotlib.pyplot as plt

import torch
torch.manual_seed(0)
import torch.nn as nn
from tqdm import tqdm
from torch.optim import Adam
import torch.nn.functional as F
from torch.nn.functional import normalize, one_hot
from torch.utils.data import DataLoader
from torch.utils.data.dataloader import default_collate

import torchvision
from torchvision.transforms import *
# from torchvision import transforms

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = "cpu"
print(device)

cuda


In [3]:
batch_size = 64
# load the dataset
transform = transforms.Compose(
    [transforms.ToTensor(), 
    transforms.Resize((64, 64))]) # this normalizes to [0,1]
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=64,
                                          shuffle=True, 
                                           collate_fn = lambda x: tuple(x_.to(device) for x_ in default_collate(x)))
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, 
                                          collate_fn = lambda x: tuple(x_.to(device) for x_ in default_collate(x)))

Files already downloaded and verified
Files already downloaded and verified


In [4]:
# model_loss = nn.CrossEntropyLoss()
num_classes = 10

In [5]:
def classifier_head_train(inp_embedding, classifier_weights, labels):
    inp_embedding = normalize(inp_embedding, p=2, dim=-1)
    classifier_output = torch.mm(inp_embedding, classifier_weights)
    classifier_output = classifier_output * one_hot(labels, num_classes = num_classes).type(torch.float32)
    theta = 1
    loss = torch.mean(torch.log(2 - (theta * torch.sum(classifier_output,1))))
    # classifier_output = torch.softmax(classifier_output, dim=-1)
    # loss = model_loss(classifier_output, one_hot(labels, num_classes = num_classes).type(torch.float32))
    return loss

In [6]:
def classifier_head_test(inp_embedding, classifier_weights, labels):
    inp_embedding = normalize(inp_embedding, p=2, dim=-1)
    classifier_output = torch.mm(inp_embedding, classifier_weights)
    # classifier_output = 1 - (torch.acos(classifier_output)/np.pi)
    # classifier_output = torch.softmax(classifier_output, dim=-1)
    # loss = model_loss(classifier_output, one_hot(labels, num_classes = num_classes).type(torch.float32))
    return torch.argmax(classifier_output, dim=1).tolist()

In [7]:
num_classes = 10
# theta = 1
# cos_similarity = nn.CosineSimilarity(dim = 1, eps = 1e-6)
mse_loss = nn.MSELoss()
initial = None

# def loss_layer(y_pos, y_neg):
#     return torch.mean(torch.log(2 - (theta * cos_similarity(y_pos, y_neg))))

In [8]:
# (num_data, num_features) => no dimension for batch size please
class Layer(nn.Linear):
    def __init__(self, in_features, out_features, dropout_prob, bias, device, lr):
        super().__init__(in_features, out_features, bias, device)
        self.out_features = out_features
        self.bias_flag = bias
        self.lr = lr
        self.dropout_prob = dropout_prob
        self.dropout = nn.Dropout(p = self.dropout_prob)
        self.num_classes = 10
        self.dimension = out_features
        self.leaky_relu = nn.PReLU(init = 0.001)
        self.opt = Adam(self.parameters(), lr = self.lr)
        global initial
        nn.init.kaiming_normal_(self.weight, mode='fan_in')
        # fc1_limit = np.sqrt(6.0 / in_features)
        # torch.nn.init.uniform_(self.weight, a=-fc1_limit, b=fc1_limit)
        self.directions = generate_points(self.num_classes, self.dimension, steps = 10000, initial_points = initial)
        initial = np.array(self.directions)
        self.directions = [torch.tensor(t, dtype = torch.float32).to(device) for t in self.directions]
        self.direction_weights = torch.zeros((len(self.directions[0]), len(self.directions)), device=device)
        for i in range(len(self.directions)):
            self.direction_weights[:, i] = normalize(self.directions[i], p = 2, dim=-1)
    
    def forward(self, x):
        x_direction = normalize(x, p = 2, dim = 1)
        if self.bias_flag:
            return self.leaky_relu(self.dropout(torch.mm(x_direction, self.weight.T) + self.bias.unsqueeze(0)))
        else:
            return self.leaky_relu(self.dropout(torch.mm(x_direction, self.weight.T)))

    def train(self, x, labels):
        # opt = Adam(self.parameters(), lr = self.lr)
        y = self.forward(x) # shape: (num_data, out_features)
        y = normalize(y, p = 2, dim = 1)
        '''
        directions = torch.zeros_like(y)
        for i in range(y.shape[0]):
            directions[i, :] = self.directions[label[i]].reshape(1, -1)
        loss = loss_layer(y, directions)
        '''
        loss = classifier_head_train(y, self.direction_weights, labels)
        self.opt.zero_grad()
        loss.backward(retain_graph = True)
        self.opt.step()

        return loss.item(), y
    
    def test(self, x, labels):
        with torch.no_grad():
            y = self.forward(x)
            # max_idx_list 
            '''
            for dat in range(y.shape[0]):
                max = -np.inf
                max_idx = 0
                for i in range(self.num_classes):
                    cos_sim = cos_similarity(y[dat, :].unsqueeze(0), self.directions[i].reshape(1, -1))
                    if cos_sim > max:
                        max = cos_sim
                        max_idx = i
                max_idx_list.append(max_idx)
            '''
            max_idx_list = []
            max_idx_list = classifier_head_test(y, self.direction_weights, labels)
        return torch.tensor(max_idx_list), y

class Layer_Net(nn.Module):
    def __init__(self, dims_list, dropout_list, bias, epochs, lr, device):
        super(Layer_Net, self).__init__()
        self.dims_list = dims_list
        self.dropout_list = dropout_list
        self.bias = bias
        self.epochs = epochs
        self.lr = lr
        self.device = device
        self.layers = []
        self.sigmoid = nn.Sigmoid()
        global initial
        for d in range(len(self.dims_list) - 1):
            self.layers += [Layer(self.dims_list[d], self.dims_list[d + 1], self.dropout_list[d], 
                                  self.bias, self.device, self.lr).to(self.device)]
        
    def train(self, data_loader):
        layer_loss_list = []
        for i in range(len(self.layers)):
            layer_loss_list.append([])
        pbar = tqdm(total = self.epochs * len(data_loader) * len(self.layers), desc = f"Training", position = 0, leave = True)
        for epoch in range(self.epochs):
            loss_agg = [0] * len(self.layers)
            for dat in data_loader:
                x = dat[0]
                label = dat[1]
                for i in range(len(self.layers)):
                    # with torch.no_grad():
                    #     y = self.layers[i].forward(x)
                    loss, y = self.layers[i].train(x, label)
                    x = y.detach()
                    loss_agg[i] += loss / len(data_loader)
                    # self.layers[i].zero_grad(set_to_none=True)
                    pbar.update(1)
            pbar.set_postfix(epoch = epoch + 1, loss = loss_agg)
            for i in range(len(self.layers)):
                layer_loss_list[i].append(loss_agg[i])
        pbar.close()
        return layer_loss_list

        
    def test(self, data_loader):
        with torch.no_grad():
            correct = 0
            total = 0
            cm_preds = []
            cm_labels = []
            for dat in tqdm(data_loader, desc = "Testing"):
                x = dat[0]
                label = dat[1]
                preds = []
                for i in range(len(self.layers)):
                    pred, x = self.layers[i].test(x, label)
                    preds.append(pred)
                correct += (preds[-1] == label.cpu()).sum().item()
                cm_preds += preds[-1].cpu().numpy().tolist()
                cm_labels += label.cpu().numpy().tolist()
                total += label.shape[0]
        return correct / total, [cm_preds, cm_labels]

In [9]:
init_kernel_classes = None

# Takes input, size of kernel and gives output dimension of cuboid
def gen_size_lists(h_init_image, w_init_image, kernel_size_list, pooling_kernels, padding_list = None, stride_list = None):
    h_list = [h_init_image]
    w_list = [w_init_image]

    new_kernel_size_list = []
    k = len(kernel_size_list) + len(pooling_kernels)
    for i in range(k):
        if i % 2 == 0:
            new_kernel_size_list.append(kernel_size_list[i // 2])
        else:
            new_kernel_size_list.append(pooling_kernels[i // 2])
    
    if stride_list is None:
        stride_list = [1] * len(new_kernel_size_list)
    if padding_list is None:
        padding_list = [0] * len(new_kernel_size_list)
    
    for i in range(k):
        if i % 2 == 1:
            if new_kernel_size_list[i] is not None:
                stride_list[i] = new_kernel_size_list[i][0]

    for i in range(len(new_kernel_size_list)):
        if new_kernel_size_list[i] is None:
            h_list.append(int(h_list[i]))
            w_list.append(int(w_list[i]))
        else:
            h_list.append(int((h_list[i] + 2 * padding_list[i] - new_kernel_size_list[i][0]) / stride_list[i] + 1))
            w_list.append(int((w_list[i] + 2 * padding_list[i] - new_kernel_size_list[i][1]) / stride_list[i] + 1))
    
    h_list = h_list[1 : ]
    w_list = w_list[1 : ]

    final_h_list, final_w_list = [], []

    for i in range(len(h_list) // 2):
        if new_kernel_size_list[2 * i + 1] is not None:
            final_h_list.append(h_list[2 * i + 1])
            final_w_list.append(w_list[2 * i + 1])
        else:
            final_h_list.append(h_list[2 * i])
            final_w_list.append(w_list[2 * i])
            
    return final_h_list, final_w_list, new_kernel_size_list

class Conv_Layer(nn.Conv2d, nn.Module):
    def __init__(self, 
                 in_channels, 
                 out_channels, 
                 kernel_size,
                 batch_norm_flag,  
                 pooling_size, 
                 pooling_type, 
                 bias, 
                 device, 
                 lr, 
                 h_out, 
                 w_out, 
                 symm_vector_dim, 
                 n_symm_vectors):
        super(Conv_Layer, self).__init__(in_channels, out_channels, kernel_size)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.batch_norm_flag = batch_norm_flag
        if self.batch_norm_flag:
            self.batch_norm = nn.BatchNorm2d(self.out_channels)
        else:
            self.batch_norm = nn.Identity()
        
        self.pooling_size = pooling_size
        self.pooling_type = pooling_type
        self.bias_flag = bias
        self.device = device
        self.lr = lr
        self.h_out = h_out
        self.w_out = w_out
        self.D = self.h_out * self.w_out
        self.symm_vector_dim = symm_vector_dim
        self.n_symm_vectors = n_symm_vectors
        # Initialize the projection matrix such that each kernel has a trainable projection matrix 
        # size of input is (batch_size x num_kernels x (h_out * w_out) 
        # output is (batch_size x num_kernels x symm_vector_dim)
        self.mlp_weights = nn.Parameter(torch.normal(0, 1, size = (self.out_channels, self.D, self.symm_vector_dim)).to(self.device), requires_grad = True)

        self.cos_similarity_2 = nn.CosineSimilarity(dim = 2, eps = 1e-6)
        self.cos_similarity_3 = nn.CosineSimilarity(dim = 3, eps = 1e-6)

        global num_classes
        self.num_classes = num_classes
        
        global init_kernel_classes
        if init_kernel_classes is None:
            # Assign the classes to each kernel
            self.kernel_classes = []
            for i in range(self.out_channels):
                group_number = list(range(self.n_symm_vectors[i]))
                flag = True
                while flag:
                    group_list = np.random.choice(group_number, self.num_classes, replace = True).tolist()
                    if len(set(group_list)) == self.n_symm_vectors[i]:
                        flag = False
                self.kernel_classes.append(group_list)
            init_kernel_classes = self.kernel_classes
        else:
            self.kernel_classes = init_kernel_classes

        self.leaky_relu = nn.LeakyReLU(negative_slope = 0.0001)
        
        self.pool = None
        if self.pooling_size is not None and self.pooling_type is not None:
            if self.pooling_type == "max":
                self.pool = nn.MaxPool2d(self.pooling_size[0])
            elif self.pooling_type == "avg":
                self.pool = nn.AvgPool2d(self.pooling_size[0])

        # self.fac = nn.Parameter(torch.tensor(10.0, dtype = torch.float32).to(self.device), requires_grad = True)
        
        self.opt = Adam(self.parameters(), lr = self.lr)

        self.tmp_directions = []
        for i in range(self.out_channels):
            self.tmp_directions.append(generate_points(self.n_symm_vectors[i], self.symm_vector_dim, steps = 10000, initial_points = None))
        self.tmp_directions = [torch.tensor(np.stack(t), dtype = torch.float32).to(self.device) for t in self.tmp_directions]
        
        # Shape of kernel_directions: (out_channels, n_symm_vectors, symm_vector_dim)
        # Broadcast to shape of: (out_channels, num_classes, symm_vector_dim)
        
        self.kernel_directions = torch.zeros((self.out_channels, self.num_classes, self.symm_vector_dim))
        for i in range(self.out_channels):
            for j in range(self.num_classes):
                self.kernel_directions[i, j, :] = self.tmp_directions[i][self.kernel_classes[i][j], :]
        
        self.test_kernel_directions = self.kernel_directions.unsqueeze(0).repeat(batch_size, 1, 1, 1).to(self.device)     
        self.kernel_directions = torch.swapaxes(self.kernel_directions, 0, 1).to(self.device)
        self.all_ones = torch.ones([batch_size, self.out_channels]).to(self.device)

    def _conv_forward(self, input, weight, bias):
        return F.conv2d(input, weight, bias, self.stride,
                        self.padding, self.dilation, self.groups)

    def forward(self, input):
        out = self._conv_forward(input, self.weight, self.bias)
        out = self.batch_norm(out)
        out = self.leaky_relu(out)
        if self.pooling_size is not None:
            out = self.pool(out)
        return out
    
    def train(self, x, labels):
        # opt = Adam(self.parameters(), lr = self.lr)
        # Shape of y: (num_data, out_channels, h_out, w_out)
        y = self.forward(x)
        # New Shape of y: (num_data, out_channels, h_out * w_out)
        z = torch.flatten(y, start_dim = 2)
        proj_y = torch.matmul(z.unsqueeze(2), self.mlp_weights).squeeze(2)
        proj_y = normalize(proj_y, p = 2, dim = 2)
        cos_sim = self.cos_similarity_2(proj_y, self.kernel_directions[labels, :, :])
        loss = mse_loss(self.all_ones[:cos_sim.shape[0], :], cos_sim)
        self.opt.zero_grad()
        loss.backward(retain_graph = True)
        self.opt.step()

        return loss.item(), y

    def test(self, x):
        with torch.no_grad():
            y = self.forward(x)
            z = torch.flatten(y, start_dim = 2)
            proj_y = torch.matmul(z.unsqueeze(2), self.mlp_weights).squeeze(2)
            proj_y = normalize(proj_y, p = 2, dim = 2)
            proj_y = proj_y.unsqueeze(2).repeat(1, 1, self.num_classes, 1)
            cos_sim = self.cos_similarity_3(proj_y, self.test_kernel_directions[ : proj_y.shape[0], :, :, :])
            cos_sim = torch.mean(cos_sim, dim = 1).cpu()
        return torch.argmax(cos_sim, dim = 1), y

# dims list will be of the form [(in_features_1, out_features_1), (in_features_2, out_features_2), ...]
class Conv_Net(nn.Module):
    def __init__(self, 
                 conv_list, 
                 kernel_size_list, 
                 batch_norm_list, 
                 pooling_size_list, 
                 pooling_type, 
                 epochs, bias, 
                 device, lr, 
                 h_list, 
                 w_list, 
                 symm_vector_dim_list, 
                 n_symm_vectors, 
                 group_layers):
        super().__init__()
        self.conv_list = conv_list
        self.kernel_size_list = kernel_size_list
        self.batch_norm_list = batch_norm_list
        self.pooling_size_list = pooling_size_list
        self.pooling_type = pooling_type
        self.epochs = epochs
        self.bias_flag = bias
        self.lr = lr
        self.device = device
        self.h_list = h_list
        self.w_list = w_list
        self.symm_vector_dim_list = symm_vector_dim_list
        self.n_symm_vectors = n_symm_vectors
        self.group_layers = group_layers
        self.layers = nn.ModuleList()

        global init_kernel_classes
        
        for i in range(len(self.kernel_size_list)):
            if i == 0:
                init_kernel_classes = None
            elif self.group_layers[i] != self.group_layers[i - 1]:
                init_kernel_classes = None

            self.layers.append(Conv_Layer(self.conv_list[i][0], 
                                          self.conv_list[i][1], 
                                          self.kernel_size_list[i],
                                          self.batch_norm_list[i], 
                                          self.pooling_size_list[i],
                                          self.pooling_type[i],
                                          self.bias_flag, 
                                          self.device, 
                                          self.lr, 
                                          self.h_list[i], 
                                          self.w_list[i], 
                                          self.symm_vector_dim_list[i],
                                          self.n_symm_vectors[i]))
        
        print(self.layers)
    
    def train(self, data_loader):
       
        layer_loss_list = []
        for i in range(len(self.layers)):
            layer_loss_list.append([])
        pbar = tqdm(total = self.epochs * len(data_loader) * len(self.layers), desc = f"Training", position = 0, leave = True)
        for epoch in range(self.epochs):
            loss_agg = [0] * len(self.layers)
            for dat in data_loader:
                x, label = dat
                for i in range(len(self.layers)):
                    # with torch.no_grad():
                    #     y = self.layers[i].forward(x)
                    loss, y = self.layers[i].train(x, label)
                    # self.layers[i].zero_grad(set_to_none=True)
                    x = y.detach()
                    loss_agg[i] += loss / len(data_loader)
                    pbar.update(1)
            pbar.set_postfix(epoch = epoch + 1, loss = loss_agg)
            for i in range(len(self.layers)):
                layer_loss_list[i].append(loss_agg[i])
        pbar.close()
        return layer_loss_list
    
    def forward_pass(self, x):
        with torch.no_grad():
            for i in range(len(self.layers)):
                x = self.layers[i].forward(x)
        return x

    def test(self, data_loader):
        with torch.no_grad():
            correct = 0
            total = 0
            cm_preds = []
            cm_labels = []
            acc_list = [0] * len(self.layers)
            for dat in tqdm(data_loader, desc = "Testing"):
                x, label = dat
                preds = []
                for i in range(len(self.layers)):
                    pred, x = self.layers[i].test(x)
                    preds.append(pred)
                
                for i, lst in enumerate(preds):
                    acc_list[i] += (lst == label.cpu()).sum().item()
                
                correct += (preds[-1] == label.cpu()).sum().item()
                cm_preds += preds[-1].cpu().numpy().tolist()
                cm_labels += label.cpu().numpy().tolist()
                total += label.shape[0]
            acc_list = [acc / total for acc in acc_list]
        return correct / total, [cm_preds, cm_labels], acc_list

In [10]:
def new_dataloader(data_loader, conv_layer, device):
    new_data, new_label = [], []
    for (data, label) in tqdm(data_loader, desc = "New Dataloader"):
        data = conv_layer.forward_pass(data).cpu()
        batch_size = data.shape[0]
        new_data.append(torch.flatten(data, start_dim = 1))
        new_label.append(label.cpu())
    
    data = torch.cat(new_data, dim = 0)
    label = torch.cat(new_label, dim = 0)
    new_data_loader = DataLoader(list(zip(data, label)), 
                                 batch_size = batch_size, shuffle = True,
                                 collate_fn = lambda x: tuple(x_.to(device) for x_ in default_collate(x)))

    return new_data_loader

In [11]:
class Full_Net(nn.Module):
    def __init__(self, 
                 conv_list, 
                 dims_list, 
                 dropout_list, 
                 kernel_size_list, 
                 batch_norm_list, 
                 pooling_size_list, 
                 pooling_type, 
                 epoch_list, 
                 bias, 
                 device, 
                 learning_rate_list, 
                 h_list, 
                 w_list, 
                 symm_vector_dim_list, 
                 n_symm_vectors,
                 group_layers
                ):
        super().__init__()
        self.network = Conv_Net(conv_list, 
                                kernel_size_list,
                                batch_norm_list,  
                                pooling_size_list, 
                                pooling_type, 
                                epoch_list[0], 
                                bias, 
                                device, 
                                learning_rate_list[0], 
                                h_list, 
                                w_list, 
                                symm_vector_dim_list, 
                                n_symm_vectors,
                                group_layers).to(device)
        self.layers = Layer_Net(dims_list, 
                                dropout_list, 
                                bias, 
                                epoch_list[1], 
                                learning_rate_list[1], 
                                device).to(device)
        
        self.train_dataloader = None
        self.test_dataloader = None
        self.device = device
        self.cm_train_preds = []
        self.cm_test_preds = []

    def train(self, dataloader):
        layer_loss_list = []
        layer_loss_list = self.network.train(dataloader)
        # Testing on CNNs
        print(f"\nInference from CNNs")
        acc, tmp, layerwise_acc_train = self.network.test(dataloader)
        print(f"Train Accuracy: {acc * 100}%\n")
        self.cm_train_preds.append(tmp)
        print("Layerwise Train Accuracy:")
        for i, acc in enumerate(layerwise_acc_train):
            print(f"Layer {i + 1}: {acc * 100}%")
        
        # have to create a new dataloader with the output of the network
        self.train_dataloader = new_dataloader(dataloader, self.network, self.device)
        layer_loss_list += self.layers.train(self.train_dataloader)

        print(f"\nInference from End-to-End Network")
        acc, tmp = self.layers.test(self.train_dataloader)
        self.cm_train_preds.append(tmp)
        print(f"Train Accuracy: {acc * 100}%")
        return layer_loss_list
    
    def test(self, dataloader, flag = False):
        print("Testing on CNNs")
        cnn_test_acc, tmp, layerwise_acc_test = self.network.test(dataloader)
        print(f"Test Accuracy: {cnn_test_acc * 100}%\n")
        self.cm_test_preds.append(tmp)
        print("Layerwise Test Accuracy:")
        for i, acc in enumerate(layerwise_acc_test):
            print(f"Layer {i + 1}: {acc * 100}%")
            
        modified_dataloader = new_dataloader(dataloader, self.network, self.device)
        if flag:
            self.test_dataloader = modified_dataloader
        
        print("Testing on End-to-End Network")
        mlp_test_acc, tmp = self.layers.test(modified_dataloader)
        print(f"Test Accuracy: {mlp_test_acc * 100}%")
        self.cm_test_preds.append(tmp)
        
        return layerwise_acc_test, cnn_test_acc, mlp_test_acc

In [12]:
num_classes = 10

In [None]:
num_runs = 5
noise_percentage_list = []
train_acc = []
test_acc = []
layer_wise_test = []

for _ in range(num_runs):
# for noise_percentage in noise_percentage_list:
        
    in_channels = [3]
    num_classes = 10
    kernel_size_list = [(5, 5), (5, 5)]
    # kernel_size_list = [(5, 5)]
    batch_norm_list = [True, True]
    # batch_norm_list = [True]
    pooling_size_list = [(2,2), (2,2)]
    # pooling_size_list = [(2,2)]
    pooling_type = ['max', 'max']
    # pooling_type = ['max']
    h_list, w_list, new_kernel_size_list = gen_size_lists(64, 64, kernel_size_list, pooling_size_list)
    symm_vector_dim_list = [100, 100]
    # symm_vector_dim_list = [100]
    kernels = [128, 128]
    # kernels = [32]
    tmp = np.random.randint(low = 2, high = 10, size = kernels[0]).tolist()
    n_symm_vectors = [tmp for _ in kernels]
    group_layers = [0, 0]
    #group_layers = [0]
    kernels = in_channels + kernels
    
    in_out_kernel_list = []
    for i in range(len(new_kernel_size_list)):
        if i % 2 == 0:
            in_out_kernel_list.append((kernels[i // 2], kernels[(i // 2) + 1]))
    
    mlp_list = [h_list[-1] * w_list[-1] * kernels[-1], 10]
    dropout_list = [0, 0]
    
    bias_flag = True
    
    epoch_list = [15, 10]
    learning_rate_list = [0.1, 0.1]
    
    net = Full_Net(in_out_kernel_list,
                   mlp_list,
                   dropout_list,
                   kernel_size_list,
                   batch_norm_list, 
                   pooling_size_list,
                   pooling_type, 
                   epoch_list, 
                   bias_flag, 
                   device, 
                   learning_rate_list,
                   h_list, 
                   w_list, 
                   symm_vector_dim_list, 
                   n_symm_vectors, 
                   group_layers).to(device)

    # trainloader = noisy_dataloader(train_loader, noise_percentage, device=device)
    net.train(train_loader)

    layerwise_acc_test, cnn_train_accuracy, mlp_train_accuracy = net.test(train_loader, flag = True)
    print(f"CNN Train Accuracy: {cnn_train_accuracy * 100}%")
    print(f"MLP Train Accuracy: {mlp_train_accuracy * 100}%")
    train_acc.append([cnn_train_accuracy, mlp_train_accuracy])

    # testloader = noisy_dataloader(test_loader, noise_percentage, device=device)
    layerwise_acc_test, cnn_test_accuracy, mlp_test_accuracy = net.test(test_loader, flag = True)
    
    print(f"CNN Test Accuracy: {cnn_test_accuracy * 100}%")
    print(f"MLP Test Accuracy: {mlp_test_accuracy * 100}%")
    test_acc.append([cnn_test_accuracy, mlp_test_accuracy])
    layer_wise_test.append(layerwise_acc_test)
print(np.shape(test_acc))

ModuleList(
  (0): Conv_Layer(
    3, 128, kernel_size=(5, 5), stride=(1, 1)
    (batch_norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (cos_similarity_2): CosineSimilarity()
    (cos_similarity_3): CosineSimilarity()
    (leaky_relu): LeakyReLU(negative_slope=0.0001)
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (1): Conv_Layer(
    128, 128, kernel_size=(5, 5), stride=(1, 1)
    (batch_norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (cos_similarity_2): CosineSimilarity()
    (cos_similarity_3): CosineSimilarity()
    (leaky_relu): LeakyReLU(negative_slope=0.0001)
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
)


Training: 100%|██████████████████████████████████████████████████████████████████████████████████████████████| 23460/23460 [14:51<00:00, 26.31it/s, epoch=15, loss=[0.6355059135447976, 0.5749208255649527]]



Inference from CNNs


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 782/782 [00:30<00:00, 25.59it/s]


Train Accuracy: 50.79%

Layerwise Train Accuracy:
Layer 1: 44.376%
Layer 2: 50.79%


New Dataloader: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 782/782 [00:08<00:00, 96.50it/s]
Training: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31250/31250 [00:26<00:00, 1178.05it/s, epoch=10, loss=[0.30551424348831174]]



Inference from End-to-End Network


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3125/3125 [00:01<00:00, 2747.47it/s]


Train Accuracy: 73.35000000000001%
Testing on CNNs


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 782/782 [00:28<00:00, 27.03it/s]


Test Accuracy: 50.983999999999995%

Layerwise Test Accuracy:
Layer 1: 44.28%
Layer 2: 50.983999999999995%


New Dataloader: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 782/782 [00:07<00:00, 109.46it/s]


Testing on End-to-End Network


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3125/3125 [00:01<00:00, 2744.32it/s]


Test Accuracy: 73.116%
CNN Train Accuracy: 50.983999999999995%
MLP Train Accuracy: 73.116%
Testing on CNNs


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 157/157 [00:05<00:00, 27.13it/s]


Test Accuracy: 49.05%

Layerwise Test Accuracy:
Layer 1: 40.949999999999996%
Layer 2: 49.05%


New Dataloader: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 157/157 [00:01<00:00, 111.13it/s]


Testing on End-to-End Network


Testing: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 625/625 [00:00<00:00, 2769.14it/s]


Test Accuracy: 65.02%
CNN Test Accuracy: 49.05%
MLP Test Accuracy: 65.02%
ModuleList(
  (0): Conv_Layer(
    3, 128, kernel_size=(5, 5), stride=(1, 1)
    (batch_norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (cos_similarity_2): CosineSimilarity()
    (cos_similarity_3): CosineSimilarity()
    (leaky_relu): LeakyReLU(negative_slope=0.0001)
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (1): Conv_Layer(
    128, 128, kernel_size=(5, 5), stride=(1, 1)
    (batch_norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (cos_similarity_2): CosineSimilarity()
    (cos_similarity_3): CosineSimilarity()
    (leaky_relu): LeakyReLU(negative_slope=0.0001)
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
)


Training: 100%|██████████████████████████████████████████████████████████████████████████████████████████████| 23460/23460 [15:09<00:00, 25.80it/s, epoch=15, loss=[0.6381936688404868, 0.5796711956677235]]



Inference from CNNs


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 782/782 [00:28<00:00, 27.07it/s]


Train Accuracy: 51.598%

Layerwise Train Accuracy:
Layer 1: 44.214%
Layer 2: 51.598%


New Dataloader: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 782/782 [00:07<00:00, 108.14it/s]
Training: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31250/31250 [00:26<00:00, 1184.06it/s, epoch=10, loss=[0.30346183584690095]]



Inference from End-to-End Network


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3125/3125 [00:01<00:00, 2662.40it/s]


Train Accuracy: 73.342%
Testing on CNNs


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 782/782 [00:29<00:00, 26.84it/s]


Test Accuracy: 51.80200000000001%

Layerwise Test Accuracy:
Layer 1: 44.226%
Layer 2: 51.80200000000001%


New Dataloader: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 782/782 [00:07<00:00, 106.09it/s]


Testing on End-to-End Network


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3125/3125 [00:01<00:00, 2633.66it/s]


Test Accuracy: 73.202%
CNN Train Accuracy: 51.80200000000001%
MLP Train Accuracy: 73.202%
Testing on CNNs


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 157/157 [00:05<00:00, 26.89it/s]


Test Accuracy: 49.74%

Layerwise Test Accuracy:
Layer 1: 40.81%
Layer 2: 49.74%


New Dataloader: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 157/157 [00:01<00:00, 107.46it/s]


Testing on End-to-End Network


Testing: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 625/625 [00:00<00:00, 2390.71it/s]


Test Accuracy: 65.36%
CNN Test Accuracy: 49.74%
MLP Test Accuracy: 65.36%
ModuleList(
  (0): Conv_Layer(
    3, 128, kernel_size=(5, 5), stride=(1, 1)
    (batch_norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (cos_similarity_2): CosineSimilarity()
    (cos_similarity_3): CosineSimilarity()
    (leaky_relu): LeakyReLU(negative_slope=0.0001)
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (1): Conv_Layer(
    128, 128, kernel_size=(5, 5), stride=(1, 1)
    (batch_norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (cos_similarity_2): CosineSimilarity()
    (cos_similarity_3): CosineSimilarity()
    (leaky_relu): LeakyReLU(negative_slope=0.0001)
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
)


Training: 100%|███████████████████████████████████████████████████████████████████████████████████████████████| 23460/23460 [15:09<00:00, 25.79it/s, epoch=15, loss=[0.638406306276541, 0.5814666638288966]]



Inference from CNNs


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 782/782 [00:33<00:00, 23.29it/s]


Train Accuracy: 49.748%

Layerwise Train Accuracy:
Layer 1: 43.84%
Layer 2: 49.748%


New Dataloader: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 782/782 [00:07<00:00, 101.29it/s]
Training: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31250/31250 [00:36<00:00, 850.78it/s, epoch=10, loss=[0.3031764376640314]]



Inference from End-to-End Network


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3125/3125 [00:01<00:00, 2055.61it/s]


Train Accuracy: 73.19200000000001%
Testing on CNNs


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 782/782 [00:41<00:00, 18.78it/s]


Test Accuracy: 49.732%

Layerwise Test Accuracy:
Layer 1: 43.86%
Layer 2: 49.732%


New Dataloader: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 782/782 [00:10<00:00, 72.94it/s]


Testing on End-to-End Network


Testing: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3125/3125 [00:03<00:00, 933.29it/s]


Test Accuracy: 73.08200000000001%
CNN Train Accuracy: 49.732%
MLP Train Accuracy: 73.08200000000001%
Testing on CNNs


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 157/157 [00:09<00:00, 15.93it/s]


Test Accuracy: 47.47%

Layerwise Test Accuracy:
Layer 1: 40.44%
Layer 2: 47.47%


New Dataloader: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 157/157 [00:02<00:00, 72.18it/s]


Testing on End-to-End Network


Testing: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 625/625 [00:00<00:00, 1086.47it/s]


Test Accuracy: 64.27000000000001%
CNN Test Accuracy: 47.47%
MLP Test Accuracy: 64.27000000000001%
ModuleList(
  (0): Conv_Layer(
    3, 128, kernel_size=(5, 5), stride=(1, 1)
    (batch_norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (cos_similarity_2): CosineSimilarity()
    (cos_similarity_3): CosineSimilarity()
    (leaky_relu): LeakyReLU(negative_slope=0.0001)
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (1): Conv_Layer(
    128, 128, kernel_size=(5, 5), stride=(1, 1)
    (batch_norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (cos_similarity_2): CosineSimilarity()
    (cos_similarity_3): CosineSimilarity()
    (leaky_relu): LeakyReLU(negative_slope=0.0001)
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
)


Training: 100%|██████████████████████████████████████████████████████████████████████████████████████████████| 23460/23460 [17:56<00:00, 21.79it/s, epoch=15, loss=[0.6233701879715985, 0.5605597971650336]]



Inference from CNNs


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 782/782 [00:30<00:00, 25.35it/s]


Train Accuracy: 52.03%

Layerwise Train Accuracy:
Layer 1: 44.214%
Layer 2: 52.03%


New Dataloader: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 782/782 [00:07<00:00, 104.83it/s]
Training: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31250/31250 [00:28<00:00, 1080.06it/s, epoch=10, loss=[0.309697022395134]]



Inference from End-to-End Network


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3125/3125 [00:01<00:00, 2475.76it/s]


Train Accuracy: 73.602%
Testing on CNNs


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 782/782 [00:30<00:00, 25.62it/s]


Test Accuracy: 51.99399999999999%

Layerwise Test Accuracy:
Layer 1: 44.198%
Layer 2: 51.99399999999999%


New Dataloader: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 782/782 [00:07<00:00, 105.40it/s]


Testing on End-to-End Network


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3125/3125 [00:01<00:00, 2597.44it/s]


Test Accuracy: 73.604%
CNN Train Accuracy: 51.99399999999999%
MLP Train Accuracy: 73.604%
Testing on CNNs


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 157/157 [00:05<00:00, 26.73it/s]


Test Accuracy: 49.57%

Layerwise Test Accuracy:
Layer 1: 40.29%
Layer 2: 49.57%


New Dataloader: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 157/157 [00:01<00:00, 105.63it/s]


Testing on End-to-End Network


Testing: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 625/625 [00:00<00:00, 2558.13it/s]


Test Accuracy: 65.03999999999999%
CNN Test Accuracy: 49.57%
MLP Test Accuracy: 65.03999999999999%
ModuleList(
  (0): Conv_Layer(
    3, 128, kernel_size=(5, 5), stride=(1, 1)
    (batch_norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (cos_similarity_2): CosineSimilarity()
    (cos_similarity_3): CosineSimilarity()
    (leaky_relu): LeakyReLU(negative_slope=0.0001)
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (1): Conv_Layer(
    128, 128, kernel_size=(5, 5), stride=(1, 1)
    (batch_norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (cos_similarity_2): CosineSimilarity()
    (cos_similarity_3): CosineSimilarity()
    (leaky_relu): LeakyReLU(negative_slope=0.0001)
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
)


Training: 100%|██████████████████████████████████████████████████████████████████████████████████████████████| 23460/23460 [14:49<00:00, 26.36it/s, epoch=15, loss=[0.6395803138118269, 0.5828255216026551]]



Inference from CNNs


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 782/782 [00:30<00:00, 25.54it/s]


Train Accuracy: 51.656%

Layerwise Train Accuracy:
Layer 1: 44.022%
Layer 2: 51.656%


New Dataloader: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 782/782 [00:07<00:00, 106.83it/s]
Training: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31250/31250 [00:27<00:00, 1124.76it/s, epoch=10, loss=[0.31060223807334897]]



Inference from End-to-End Network


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3125/3125 [00:01<00:00, 2553.56it/s]


Train Accuracy: 71.756%
Testing on CNNs


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 782/782 [00:30<00:00, 25.53it/s]


Test Accuracy: 51.732%

Layerwise Test Accuracy:
Layer 1: 43.97%
Layer 2: 51.732%


New Dataloader: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 782/782 [00:07<00:00, 106.53it/s]


Testing on End-to-End Network


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3125/3125 [00:01<00:00, 2549.16it/s]


Test Accuracy: 71.5%
CNN Train Accuracy: 51.732%
MLP Train Accuracy: 71.5%
Testing on CNNs


Testing: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 157/157 [00:06<00:00, 25.69it/s]


Test Accuracy: 49.730000000000004%

Layerwise Test Accuracy:
Layer 1: 40.56%
Layer 2: 49.730000000000004%


New Dataloader:  56%|███████████████████████████████████████████████████████████████████████████████████▌                                                                 | 88/157 [00:00<00:00, 107.16it/s]

In [None]:
np.save("./data/cifar10_train_acc_cnn_12_cos_bf.npy", np.array(train_acc))
np.save("./data/cifar10_test_acc_cnn_12_cos_bf.npy", np.array(test_acc))
np.save("./data/cifar10_layerwise_test_acc_cnn_12_cos_bf.npy", np.array(layer_wise_test))

In [None]:
h_list

In [None]:
_, cnn_test_accuracy, mlp_test_accuracy = net.test(test_loader, flag = True)
print(f"CNN Test Accuracy: {cnn_test_accuracy * 100}%")
print(f"MLP Test Accuracy: {mlp_test_accuracy * 100}%")