<a href="https://colab.research.google.com/github/7201krap/PYTORCH_project/blob/main/selectivity_hyper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torchvision import transforms
import torchvision.datasets as datasets
import matplotlib.pyplot as plt
import numpy as np
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
torch.manual_seed(1234)
np.random.seed(1234)

In [4]:
mnist_trainset = datasets.MNIST(root='./data', train=True, 
                                download=True, 
                                transform=transforms.Compose([transforms.ToTensor()]))

mnist_testset  = datasets.MNIST(root='./data', 
                                train=False, 
                                download=True, 
                                transform=transforms.Compose([transforms.ToTensor()]))

train_dataloader = torch.utils.data.DataLoader(mnist_trainset, 
                                               batch_size=50, 
                                               shuffle=True)

test_dataloader  = torch.utils.data.DataLoader(mnist_testset, 
                                               batch_size=50, 
                                               shuffle=False)

print("Training dataset size: ", len(mnist_trainset))
print("Testing dataset size: ",  len(mnist_testset))

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw
Processing...
Done!
Training dataset size:  60000
Testing dataset size:  10000


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [5]:
# Define the model 
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.linear_1 = torch.nn.Linear(784, 256)
        self.linear_2 = torch.nn.Linear(256, 10)
        self.sigmoid  = torch.nn.Sigmoid()

    def forward(self, x):
        x = x.reshape(x.size(0), -1)
        x = self.linear_1(x)
        x = self.sigmoid(x)
        pred = self.linear_2(x)

        return pred

In [6]:
def get_activation(model):    
    def hook(module, input, output):
        model.layer_activations = output
    return hook

In [7]:
no_epochs = 30
def selectivity_trainer(optimizer, model):

    hidden_layer_each_neuron = [{0:[], 1:[], 2:[], 3:[], 4:[], 5:[], 6:[], 7:[], 8:[], 9:[]} for x in range(256)]
    hidden_layer_each_neuron = np.array(hidden_layer_each_neuron)

    criterion = torch.nn.CrossEntropyLoss()
    train_loss = list()
    test_loss  = list()
    test_acc   = list()

    best_test_loss = 1

    for epoch in range(no_epochs):
        total_train_loss = 0
        total_test_loss = 0

        # training
        # set up training mode 
        model.train()

        for itr, (images, labels) in enumerate(train_dataloader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()

            pred = model(images)

            loss = criterion(pred, labels)
            total_train_loss += loss.item()

            loss.backward()
            optimizer.step()

            # Print/Append activation of the hidden layer 
            # print(model.layer_activations.shape)
            # model.layer_activations

        total_train_loss = total_train_loss / (itr + 1)
        train_loss.append(total_train_loss)

        # testing 
        # change to evaluation mode 
        model.eval()
        total = 0
        for itr, (images, labels) in enumerate(test_dataloader):
            images, labels = images.to(device), labels.to(device)

            pred = model(images)

            loss = criterion(pred, labels)
            total_test_loss += loss.item()

            # we now need softmax because we are testing.
            pred = torch.nn.functional.softmax(pred, dim=1)
            for i, p in enumerate(pred):
                if labels[i] == torch.max(p.data, 0)[1]:
                    total = total + 1

            # find selectivity at the final epoch 
            if epoch == no_epochs - 1: # last epoch 
                for activation, label in zip(model.layer_activations, labels):
                    # shape of activation and label: 256 and 1 
                    
                    # get the actual value of item. This is because label is now Tensor 
                    label = label.item()

                    # this is not part of gradient calculcation 
                    with torch.no_grad():
                        activation = activation.numpy()

                    # for each image/label, append activation value of neuron 
                    for i in range(256):    # number of neurons in hidden layer 
                        hidden_layer_each_neuron[i][label].append(activation[i])


        # caculate accuracy 
        accuracy = total / len(mnist_testset)

        # append accuracy here
        test_acc.append(accuracy)

        # append test loss here 
        total_test_loss = total_test_loss / (itr + 1)
        test_loss.append(total_test_loss)

        # print('\nEpoch: {}/{}, Train Loss: {:.8f}, Test Loss: {:.8f}, Test Accuracy: {:.8f}'.format(epoch + 1, no_epochs, total_train_loss, total_test_loss, accuracy))

        # if total_test_loss < best_test_loss:
        #     best_test_loss = total_test_loss
        #     print("Saving the model state dictionary for Epoch: {} with Test loss: {:.8f}".format(epoch + 1, total_test_loss))
        #     torch.save(model.state_dict(), "model.dth")

    # I will now try to find the average of each class for each neuron.
    # check out the next cell 
    avg_activations = [dict() for x in range(256)]
    for i, neuron in enumerate(hidden_layer_each_neuron):
        for k, v in neuron.items():
            # v is the list of activations for hidden layer's neuron k 
            avg_activations[i][k] = sum(v) / float(len(v))

    # generate 256 lists to get only values in avg_activations
    only_activation_vals = [list() for x in range(256)]

    # selectivity_list contains all of the selectivity of each neuron 
    selectivity_list = list()

    # get only values from avg_activations
    for i, avg_activation in enumerate(avg_activations):
        for value in avg_activation.values():
            only_activation_vals[i].append(value)


    for activation_val in only_activation_vals:
        # find u_max 
        u_max = np.max(activation_val)

        # find u_minus_max 
        u_minus_max = (np.sum(activation_val) - u_max) / 9

        # find selectivity 
        selectivity = (u_max - u_minus_max) / (u_max + u_minus_max)

        # append selectivity value to selectivity_list
        selectivity_list.append(selectivity)

    return test_acc, selectivity_list





# AdaDelta

### Rho, weight decay, and learning rate

default: torch.optim.Adadelta(params, lr=1.0, rho=0.9, eps=1e-06, weight_decay=0)


Rho 

In [None]:
rho_list = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
for i in range(11):
    print(f"Entering {i}-th loop")
    model_adadelta_rho = Model()
    model_adadelta_rho.to(device)
    model_adadelta_rho.sigmoid.register_forward_hook(get_activation(model_adadelta_rho))
    optimizer_adadelta = torch.optim.Adadelta(model_adadelta_rho.parameters(), rho=rho_list[i])
    adadelta_test_acc, adadelta_selectivity_list = selectivity_trainer(optimizer=optimizer_adadelta, model=model_adadelta_rho)
    
    if i == 0:
        f = open("selectivity_adadelta_rho.txt", "w")
        f.write(str(i)+'\n'+str(adadelta_test_acc)+'\n'+str(np.average(adadelta_selectivity_list))+'\n'+str(np.std(adadelta_selectivity_list))+'\n\n')
    else:
        f = open("selectivity_adadelta_rho.txt", "a")
        f.write(str(i)+'\n'+str(adadelta_test_acc)+'\n'+str(np.average(adadelta_selectivity_list))+'\n'+str(np.std(adadelta_selectivity_list))+'\n\n')
f.close()

Entering 0-th loop
Entering 1-th loop
Entering 2-th loop


In [None]:
!cp selectivity_adadelta_rho.txt /content/drive/MyDrive

Weight decay

In [None]:
weight_decay = np.logspace(-5, 1, 11)

for i in range(11):
    print(f"Entering {i}-th loop")
    model_adadelta_weight_decay = Model()
    model_adadelta_weight_decay.to(device)
    model_adadelta_weight_decay.sigmoid.register_forward_hook(get_activation(model_adadelta_weight_decay))
    optimizer_adadelta = torch.optim.Adadelta(model_adadelta_weight_decay.parameters(), weight_decay=weight_decay[i])
    adadelta_test_acc, adadelta_selectivity_list = selectivity_trainer(optimizer=optimizer_adadelta, model=model_adadelta_weight_decay)

    if i == 0:
        f = open("selectivity_adadelta_weight_decay.txt", "w")
        f.write(str(i)+'\n'+str(adadelta_test_acc)+'\n'+str(np.average(adadelta_selectivity_list))+'\n'+str(np.std(adadelta_selectivity_list))+'\n\n')
    else:
        f = open("selectivity_adadelta_weight_decay.txt", "a")
        f.write(str(i)+'\n'+str(adadelta_test_acc)+'\n'+str(np.average(adadelta_selectivity_list))+'\n'+str(np.std(adadelta_selectivity_list))+'\n\n')

f.close()

In [None]:
!cp selectivity_adadelta_weight_decay.txt /content/drive/MyDrive

Learning rate

In [None]:
learning_rate = np.logspace(-5, 1, 11)

for i in range(11):
    print(f"Entering {i}-th loop")
    model_adadelta_lr = Model()
    model_adadelta_lr.to(device)
    model_adadelta_lr.sigmoid.register_forward_hook(get_activation(model_adadelta_lr))
    optimizer_adadelta = torch.optim.Adadelta(model_adadelta_lr.parameters(), lr=learning_rate[i])
    adadelta_test_acc, adadelta_selectivity_list = selectivity_trainer(optimizer=optimizer_adadelta, model=model_adadelta_lr)

    if i == 0:
        f = open("selectivity_adadelta_learning_rate.txt", "w")
        f.write(str(i)+'\n'+str(adadelta_test_acc)+'\n'+str(np.average(adadelta_selectivity_list))+'\n'+str(np.std(adadelta_selectivity_list))+'\n\n')
    else:
        f = open("selectivity_adadelta_learning_rate.txt", "a")
        f.write(str(i)+'\n'+str(adadelta_test_acc)+'\n'+str(np.average(adadelta_selectivity_list))+'\n'+str(np.std(adadelta_selectivity_list))+'\n\n')

f.close()

In [None]:
!cp selectivity_adadelta_learning_rate.txt /content/drive/MyDrive

# AdaGrad

### weight_decay and learning rate

default: torch.optim.Adagrad(params, lr=0.01, lr_decay=0, weight_decay=0, initial_accumulator_value=0, eps=1e-10)

weight_decay

In [None]:
weight_decay = np.logspace(-5, 1, 11)

for i in range(11):
    print(f"Entering {i}-th loop")
    model_adagrad_weight_decay = Model()
    model_adagrad_weight_decay.to(device)
    model_adagrad_weight_decay.sigmoid.register_forward_hook(get_activation(model_adagrad_weight_decay))
    optimizer_adagrad = torch.optim.Adagrad(model_adagrad_weight_decay.parameters(), weight_decay=weight_decay[i])
    adagrad_test_acc, adagrad_selectivity_list = selectivity_trainer(optimizer=optimizer_adagrad, model=model_adagrad_weight_decay)

    if i == 0:
        f = open("selectivity_adagrad_weight_decay.txt", "w")
        f.write(str(i)+'\n'+str(adagrad_test_acc)+'\n'+str(np.average(adagrad_selectivity_list))+'\n'+str(np.std(adagrad_selectivity_list))+'\n\n')
    else:
        f = open("selectivity_adagrad_weight_decay.txt", "a")
        f.write(str(i)+'\n'+str(adagrad_test_acc)+'\n'+str(np.average(adagrad_selectivity_list))+'\n'+str(np.std(adagrad_selectivity_list))+'\n\n')

f.close()

In [None]:
!cp selectivity_adagrad_weight_decay.txt /content/drive/MyDrive

Learning rate

In [None]:
learning_rate = np.logspace(-5, 1, 11)

for i in range(11):
    print(f"Entering {i}-th loop")
    model_adagrad_lr = Model()
    model_adagrad_lr.to(device)
    model_adagrad_lr.sigmoid.register_forward_hook(get_activation(model_adagrad_lr))
    optimizer_adagrad = torch.optim.Adagrad(model_adagrad_lr.parameters(), lr=learning_rate[i])
    adagrad_test_acc, adagrad_selectivity_list = selectivity_trainer(optimizer=optimizer_adagrad, model=model_adagrad_lr)

    if i == 0:
        f = open("selectivity_adagrad_learning_rate.txt", "w")
        f.write(str(i)+'\n'+str(adagrad_test_acc)+'\n'+str(np.average(adagrad_selectivity_list))+'\n'+str(np.std(adagrad_selectivity_list))+'\n\n')
    else:
        f = open("selectivity_adagrad_learning_rate.txt", "a")
        f.write(str(i)+'\n'+str(adagrad_test_acc)+'\n'+str(np.average(adagrad_selectivity_list))+'\n'+str(np.std(adagrad_selectivity_list))+'\n\n')

f.close()

In [None]:
!cp selectivity_adagrad_learning_rate.txt /content/drive/MyDrive

# SGD

### learning rate, weight decay, and momentum

default: torch.optim.SGD(params, lr=<required parameter>, momentum=0, dampening=0, weight_decay=0, nesterov=False)

learning rate

In [None]:
learning_rate = np.logspace(-5, 1, 11)

for i in range(11):
    print(f"Entering {i}-th loop")
    model_SGD_lr = Model()
    model_SGD_lr.to(device)
    model_SGD_lr.sigmoid.register_forward_hook(get_activation(model_SGD_lr))
    optimizer_SGD = torch.optim.SGD(model_SGD_lr.parameters(), lr=learning_rate[i])
    SGD_test_acc, SGD_selectivity_list = selectivity_trainer(optimizer=optimizer_SGD, model=model_SGD_lr)

    if i == 0:
        f = open("selectivity_SGD_learning_rate.txt", "w")
        f.write(str(i)+'\n'+str(SGD_test_acc)+'\n'+str(np.average(SGD_selectivity_list))+'\n'+str(np.std(SGD_selectivity_list))+'\n\n')
    else:
        f = open("selectivity_SGD_learning_rate.txt", "a")
        f.write(str(i)+'\n'+str(SGD_test_acc)+'\n'+str(np.average(SGD_selectivity_list))+'\n'+str(np.std(SGD_selectivity_list))+'\n\n')

f.close()

In [None]:
!cp selectivity_SGD_learning_rate.txt /content/drive/MyDrive

weight decay

In [None]:
weight_decay = np.logspace(-5, 1, 11)

for i in range(11):
    print(f"Entering {i}-th loop")
    model_SGD_weight_decay = Model()
    model_SGD_weight_decay.to(device)
    model_SGD_weight_decay.sigmoid.register_forward_hook(get_activation(model_SGD_weight_decay))
    optimizer_SGD = torch.optim.SGD(model_SGD_weight_decay.parameters(), weight_decay=weight_decay[i], lr=0.1)
    SGD_test_acc, SGD_selectivity_list = selectivity_trainer(optimizer=optimizer_SGD, model=model_SGD_weight_decay)

    if i == 0:
        f = open("selectivity_SGD_weight_decay.txt", "w")
        f.write(str(i)+'\n'+str(SGD_test_acc)+'\n'+str(np.average(SGD_selectivity_list))+'\n'+str(np.std(SGD_selectivity_list))+'\n\n')
    else:
        f = open("selectivity_SGD_weight_decay.txt", "a")
        f.write(str(i)+'\n'+str(SGD_test_acc)+'\n'+str(np.average(SGD_selectivity_list))+'\n'+str(np.std(SGD_selectivity_list))+'\n\n')

f.close()

In [None]:
!cp selectivity_SGD_weight_decay.txt /content/drive/MyDrive

momentum

In [None]:
# 다시 돌리기 
momentum = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

for i in range(11):
    print(f"Entering {i}-th loop")
    model_SGD_lr = Model()
    model_SGD_lr.to(device)
    model_SGD_lr.sigmoid.register_forward_hook(get_activation(model_SGD_lr))
    optimizer_SGD = torch.optim.SGD(model_SGD_lr.parameters(), momentum=momentum[i], lr=0.1)
    SGD_test_acc, SGD_selectivity_list = selectivity_trainer(optimizer=optimizer_SGD, model=model_SGD_lr)

    if i == 0:
        f = open("selectivity_SGD_momentum.txt", "w")
        f.write(str(i)+'\n'+str(SGD_test_acc)+'\n'+str(np.average(SGD_selectivity_list))+'\n'+str(np.std(SGD_selectivity_list))+'\n\n')
    else:
        f = open("selectivity_SGD_momentum.txt", "a")
        f.write(str(i)+'\n'+str(SGD_test_acc)+'\n'+str(np.average(SGD_selectivity_list))+'\n'+str(np.std(SGD_selectivity_list))+'\n\n')

f.close()

In [None]:
!cp selectivity_SGD_momentum.txt /content/drive/MyDrive

# Adam

### weight decay, learning rate, lhs betas, and rhs betas

default: torch.optim.Adam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

Weight decay

In [None]:
weight_decay = np.logspace(-5, 1, 11)

for i in range(11):
    print(f"Entering {i}-th loop")
    model_Adam_weight_decay = Model()
    model_Adam_weight_decay.to(device)
    model_Adam_weight_decay.sigmoid.register_forward_hook(get_activation(model_Adam_weight_decay))
    optimizer_Adam = torch.optim.Adam(model_Adam_weight_decay.parameters(), weight_decay=weight_decay[i])
    Adam_test_acc, Adam_selectivity_list = selectivity_trainer(optimizer=optimizer_Adam, model=model_Adam_weight_decay)

    if i == 0:
        f = open("selectivity_Adam_weight_decay.txt", "w")
        f.write(str(i)+'\n'+str(Adam_test_acc)+'\n'+str(np.average(Adam_selectivity_list))+'\n'+str(np.std(Adam_selectivity_list))+'\n\n')
    else:
        f = open("selectivity_Adam_weight_decay.txt", "a")
        f.write(str(i)+'\n'+str(Adam_test_acc)+'\n'+str(np.average(Adam_selectivity_list))+'\n'+str(np.std(Adam_selectivity_list))+'\n\n')

f.close()

In [None]:
!cp selectivity_Adam_weight_decay.txt /content/drive/MyDrive

Learning rate

In [None]:
# 다시 돌리기 
learning_rate = np.logspace(-5, 1, 11)

for i in range(11):
    print(f"Entering {i}-th loop")
    model_Adam_lr = Model()
    model_Adam_lr.to(device)
    model_Adam_lr.sigmoid.register_forward_hook(get_activation(model_Adam_lr))
    optimizer_Adam = torch.optim.Adam(model_Adam_lr.parameters(), lr=learning_rate[i])
    Adam_test_acc, Adam_selectivity_list = selectivity_trainer(optimizer=optimizer_Adam, model=model_Adam_lr)

    if i == 0:
        f = open("selectivity_Adam_learning_rate.txt", "w")
        f.write(str(i)+'\n'+str(Adam_test_acc)+'\n'+str(np.average(Adam_selectivity_list))+'\n'+str(np.std(Adam_selectivity_list))+'\n\n')
    else:
        f = open("selectivity_Adam_learning_rate.txt", "a")
        f.write(str(i)+'\n'+str(Adam_test_acc)+'\n'+str(np.average(Adam_selectivity_list))+'\n'+str(np.std(Adam_selectivity_list))+'\n\n')

f.close()

In [None]:
!cp selectivity_Adam_learning_rate.txt /content/drive/MyDrive

Betas - lhs

In [None]:
betas_lhs = [0.0001, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.999]

for i in range(11):
    print(f"Entering {i}-th loop")
    model_Adam_betas = Model()
    model_Adam_betas.to(device)
    model_Adam_betas.sigmoid.register_forward_hook(get_activation(model_Adam_betas))
    optimizer_Adam = torch.optim.Adam(model_Adam_betas.parameters(), betas=(betas_lhs[i], 0.999))
    Adam_test_acc, Adam_selectivity_list = selectivity_trainer(optimizer=optimizer_Adam, model=model_Adam_betas)

    if i == 0:
        f = open("selectivity_Adam_betas_lhs.txt", "w")
        f.write(str(i)+'\n'+str(Adam_test_acc)+'\n'+str(np.average(Adam_selectivity_list))+'\n'+str(np.std(Adam_selectivity_list))+'\n\n')
    else:
        f = open("selectivity_Adam_betas_lhs.txt", "a")
        f.write(str(i)+'\n'+str(Adam_test_acc)+'\n'+str(np.average(Adam_selectivity_list))+'\n'+str(np.std(Adam_selectivity_list))+'\n\n')

f.close()

In [None]:
!cp selectivity_Adam_betas_lhs.txt /content/drive/MyDrive

Betas - rhs

In [None]:
# 다시 돌리기 
betas_rhs = [0.0001, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.999]


for i in range(11):
    print(f"Entering {i}-th loop")
    model_Adam_betas = Model()
    model_Adam_betas.to(device)
    model_Adam_betas.sigmoid.register_forward_hook(get_activation(model_Adam_betas))
    optimizer_Adam = torch.optim.Adam(model_Adam_betas.parameters(), betas=(0.9, betas_rhs[i]))
    Adam_test_acc, Adam_selectivity_list = selectivity_trainer(optimizer=optimizer_Adam, model=model_Adam_betas)

    if i == 0:
        f = open("selectivity_Adam_betas_rhs.txt", "w")
        f.write(str(i)+'\n'+str(Adam_test_acc)+'\n'+str(np.average(Adam_selectivity_list))+'\n'+str(np.std(Adam_selectivity_list))+'\n\n')
    else:
        f = open("selectivity_Adam_betas_rhs.txt", "a")
        f.write(str(i)+'\n'+str(Adam_test_acc)+'\n'+str(np.average(Adam_selectivity_list))+'\n'+str(np.std(Adam_selectivity_list))+'\n\n')

f.close()

In [None]:
!cp selectivity_Adam_betas_rhs.txt /content/drive/MyDrive