Initialize shallow feedforward fully-connected network with V1 type weights and classify __full fashion mnist__ dataset using __Gradient descent__.

In [1]:
import numpy as np
import numpy.linalg as la
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from data_fns import load_fashion_mnist
from estimator import classical_weights, V1_inspired_weights, V1_weights_multiple_scales
import pickle

In [2]:
torch.cuda.set_device(0)
train, train_labels, test, test_labels = load_fashion_mnist('./data/fashion_mnist/')
X_train = torch.from_numpy(train).float().to('cuda')
X_test = torch.from_numpy(test).float().to('cuda')
y_train = torch.from_numpy(train_labels).long().to('cuda')
y_test = torch.from_numpy(test_labels).long().to('cuda')
n, d = X_train.shape

In [3]:
class V1_net(nn.Module):
    def __init__(self, hidden_size, scale):
        super().__init__()
        self.fc1 = nn.Linear(d, hidden_size)
        self.fc1.weight.data = torch.FloatTensor(V1_inspired_weights(hidden_size, d, t=5, l=2, scale=scale))
        self.output = nn.Linear(hidden_size, 10)
        
    def forward(self, inputs):
        x = torch.relu(self.fc1(inputs))
        return self.output(x)
    
class He_net(nn.Module):
    def __init__(self, hidden_size, scale):
        super().__init__()
        self.fc1 = nn.Linear(d, hidden_size)
        torch.nn.init.kaiming_normal_(self.fc1.weight)
        self.output = nn.Linear(hidden_size, 10)
        
    def forward(self, inputs):
        x = torch.relu(self.fc1(inputs))
        return self.output(x)
    
class RF_net(nn.Module):
    def __init__(self, hidden_size, scale):
        super().__init__()
        self.fc1 = nn.Linear(d, hidden_size)
        self.fc1.weight.data = torch.FloatTensor(classical_weights(hidden_size, d, scale=scale))
        self.output = nn.Linear(hidden_size, 10)
        
    def forward(self, inputs):
        x = torch.relu(self.fc1(inputs))
        return self.output(x)

In [4]:
def predict(model, X):
    return model(X).data.max(1)[1]

def error(model, X, y):
    y_pred = predict(model, X)
    accuracy = 1.0 * torch.sum(y_pred == y) / len(y)
    return 1 - accuracy

In [5]:
V1_model = V1_net(100, 2/d)
RF_model = RF_net(100, 2/d)
He_model= He_net(100, 2/d)
print('V1', torch.mean(torch.norm(V1_model.fc1.weight.data, dim=1)))
print('He', torch.mean(torch.norm(He_model.fc1.weight.data, dim=1)))
print('RF', torch.mean(torch.norm(RF_model.fc1.weight.data, dim=1)))
w = torch.mean(torch.norm(RF_model.fc1.weight.data, dim=1))

V1 tensor(1.4182)
He tensor(1.4097)
RF tensor(1.4103)


In [7]:
n_trials, n_epochs = 10, 3001
models = {'V1': V1_net, 'He': He_net}
h_list = [50, 100, 400, 1000]
lr_list = [1e-3, 1e-2, 1e-1, 1e0]

train_err = {m: {h: {lr: np.zeros((n_trials, n_epochs)) for lr in lr_list} for h in h_list} for m in models.keys()}
test_err = {m: {h: {lr: np.zeros((n_trials, n_epochs)) for lr in lr_list} for h in h_list} for m in models.keys()}
loss_list = {m: {h: {lr: np.zeros((n_trials, n_epochs)) for lr in lr_list} for h in h_list} for m in models.keys()}

In [None]:
scale = 2/d
t, l = 5, 2
loss_func = nn.CrossEntropyLoss()

for h in h_list:
    for lr in lr_list:
        for m, network in models.items():
            for i in range(n_trials):
                model = network(h, scale).to('cuda')
                optim = torch.optim.SGD(model.parameters(), lr=lr)
                for j in range(n_epochs):
                    optim.zero_grad()
                    loss = loss_func(model(X_train), y_train)
                    loss.backward()
                    optim.step()

                    train_err[m][h][lr][i, j] = error(model, X_train, y_train)
                    test_err[m][h][lr][i, j] = error(model, X_test, y_test)
                    loss_list[m][h][lr][i, j] = loss.data

                    if (j % 600 == 0):
                        print('Trial %d, Epoch: %d, %s model, h=%d, lr=%0.5f, Loss=%0.5f, test err=%0.3f' % (i,j, m, h, lr, loss.data, 
                                                                                                  test_err[m][h][lr][i, j]))
results = {'test_err': test_err, 'train_err': train_err, 'loss': loss_list}
with open('results/initialize_fashion_mnist/full_data/clf_t=%0.2f_l=%0.2f.pickle' % (t, l), 'wb') as handle:
    pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)

Trial 0, Epoch: 0, V1 model, h=50, lr=0.00100, Loss=2.63943, test err=0.866
Trial 0, Epoch: 600, V1 model, h=50, lr=0.00100, Loss=1.62902, test err=0.485
Trial 0, Epoch: 1200, V1 model, h=50, lr=0.00100, Loss=1.31281, test err=0.391
Trial 0, Epoch: 1800, V1 model, h=50, lr=0.00100, Loss=1.14445, test err=0.364
Trial 0, Epoch: 2400, V1 model, h=50, lr=0.00100, Loss=1.04262, test err=0.346
Trial 0, Epoch: 3000, V1 model, h=50, lr=0.00100, Loss=0.97460, test err=0.339
Trial 1, Epoch: 0, V1 model, h=50, lr=0.00100, Loss=3.35059, test err=0.912
Trial 1, Epoch: 600, V1 model, h=50, lr=0.00100, Loss=1.53798, test err=0.444
Trial 1, Epoch: 1200, V1 model, h=50, lr=0.00100, Loss=1.23642, test err=0.377
Trial 1, Epoch: 1800, V1 model, h=50, lr=0.00100, Loss=1.08296, test err=0.349
Trial 1, Epoch: 2400, V1 model, h=50, lr=0.00100, Loss=0.99204, test err=0.332
Trial 1, Epoch: 3000, V1 model, h=50, lr=0.00100, Loss=0.93195, test err=0.319
Trial 2, Epoch: 0, V1 model, h=50, lr=0.00100, Loss=3.06157,

## plot results

In [1]:
import pickle
import numpy as np
import matplotlib.pyplot as plt

t, l, n_epochs= 5, 2, 3001
models = ['V1', 'He']
h_list = [50, 100, 400, 1000]
lr_list = [1e-3, 1e-2, 1e-1, 1e0]

for h in h_list:
    for lr in lr_list:
        with open('results/initialize_fashion_mnist/full_data/clf_t=%0.2f_l=%0.2f.pickle' % (t, l), 'rb') as handle:
            sims = pickle.load(handle)

        fig = plt.figure(figsize=(12, 5))
        plt.suptitle(r'Shallow FFW FC net w/ GD. h=%d, lr=%0.4f, '% (h, lr), fontsize=16)
        
        # loss
        ax = fig.add_subplot(131)
        plt.title('Network loss', fontsize=16)
        for m in models:
            avg_loss = np.mean(sims['loss'][m][h][lr], axis=0)
            std_loss = np.std(sims['loss'][m][h][lr], axis=0)                       
            plt.plot(np.arange(n_epochs), avg_loss, label=m, lw=3)
            plt.fill_between(np.arange(n_epochs), avg_loss - std_loss, avg_loss + std_loss, alpha=0.2)
        plt.xlabel('Epoch', fontsize=20)
        plt.ylabel('Training loss', fontsize=20)
        ax.tick_params(axis = 'both', which = 'major', labelsize = 14, width=2, length=6)
        plt.yscale('log')
        plt.legend(fontsize=18)
        
        # train err
        ax = fig.add_subplot(132)
        plt.title('Train error', fontsize=16)
        for m in models:
            avg_train_err = np.mean(sims['train_err'][m][h][lr], axis=0)
            std_train_err = np.std(sims['train_err'][m][h][lr], axis=0)
            plt.plot(np.arange(n_epochs), avg_train_err, label=m, lw=3)
            plt.fill_between(np.arange(n_epochs), avg_train_err - std_train_err, avg_train_err + std_train_err,  alpha=0.2)
        
        plt.xlabel('Epoch', fontsize=20)
        plt.ylabel('Training error', fontsize=20)
        ax.tick_params(axis = 'both', which = 'major', labelsize = 14, width=2, length=6)
        plt.yticks(np.arange(0, 1, 0.2))
        plt.yscale('log')
        plt.legend(fontsize=18)

        # test err
        ax = fig.add_subplot(133)
        plt.title('Test error', fontsize=16)
        for m in models:
            avg_test_err = np.mean(sims['test_err'][m][h][lr], axis=0)
            std_test_err = np.std(sims['test_err'][m][h][lr], axis=0)
            plt.plot(np.arange(n_epochs), avg_test_err, label=m, lw=3)
            plt.fill_between(np.arange(n_epochs), avg_test_err - std_test_err, avg_test_err + std_test_err, alpha=0.2)

        plt.xlabel('Epoch', fontsize=20)
        plt.ylabel('Test error', fontsize=20)
        ax.tick_params(axis = 'both', which = 'major', labelsize = 14, width=2, length=6)
        plt.yticks(np.arange(0, 1, 0.2))
        plt.yscale('log')
        plt.legend(fontsize=18)

        plt.tight_layout()
        plt.subplots_adjust(top=0.8)    

        print(h, lr)
        plt.savefig('results/initialize_fashion_mnist/full_data/init_t=%0.2f_l=%0.2f_h=%d_lr=%0.4f.png' % (t, l, h, lr))
        plt.close()

50 0.001
50 0.01
50 0.1
50 1.0
100 0.001
100 0.01
100 0.1
100 1.0
400 0.001
400 0.01
400 0.1
400 1.0
1000 0.001
1000 0.01
1000 0.1
1000 1.0


## Test the speed of training networks on GPUs vs CPU

In [None]:
import numpy as np
import numpy.linalg as la
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from estimator import classical_weights, V1_inspired_weights
from data_fns import load_fashion_mnist

In [None]:
train, train_labels, test, test_labels = load_fashion_mnist('./data/fashion_mnist')
X_train = torch.from_numpy(train).float()
X_test = torch.from_numpy(test).float()
y_train = torch.from_numpy(train_labels).long()
y_test = torch.from_numpy(test_labels).long()
n, d = X_train.shape

In [None]:
class V1_net(nn.Module):
    def __init__(self, hidden_size, scale):
        super().__init__()
        self.fc1 = nn.Linear(d, hidden_size)
        self.fc1.weight.data = torch.FloatTensor(V1_inspired_weights(hidden_size, d, t=5, l=2, scale=scale))
        self.output = nn.Linear(hidden_size, 10)
        
    def forward(self, inputs):
        x = torch.relu(self.fc1(inputs))
        return self.output(x)
    
class He_net(nn.Module):
    def __init__(self, hidden_size, scale):
        super().__init__()
        self.fc1 = nn.Linear(d, hidden_size)
        torch.nn.init.kaiming_normal_(self.fc1.weight)
        self.output = nn.Linear(hidden_size, 10)
        
    def forward(self, inputs):
        x = torch.relu(self.fc1(inputs))
        return self.output(x)
    
class RF_net(nn.Module):
    def __init__(self, hidden_size, scale):
        super().__init__()
        self.fc1 = nn.Linear(d, hidden_size)
        self.fc1.weight.data = torch.FloatTensor(classical_weights(hidden_size, d, scale=scale))
        self.output = nn.Linear(hidden_size, 10)
        
    def forward(self, inputs):
        x = torch.relu(self.fc1(inputs))
        return self.output(x)    
    
class V1_net_multiscale(nn.Module):
    def __init__(self, hidden_size, scale):
        super().__init__()
        self.fc1 = nn.Linear(d, hidden_size)
        self.fc1.weight.data = torch.FloatTensor(V1_weights_multiple_scales(hidden_size, d, scale=scale))
        self.output = nn.Linear(hidden_size, 10)
        
    def forward(self, inputs):
        x = torch.relu(self.fc1(inputs))
        return self.output(x)
    
    
def predict(model, X):
    return model(X).data.max(1)[1]

def error(model, X, y):
    y_pred = predict(model, X)
    accuracy = 1.0 * torch.sum(y_pred == y) / len(y)
    return 1 - accuracy

In [None]:
%%time
## Run on GPU
X_train_cuda, y_train_cuda = X_train.to('cuda'), y_train.to('cuda')
X_test_cuda, y_test_cuda = X_test.to('cuda'), y_test.to('cuda')

h, lr, scale = 1000, 0.1, 2/d
n_epochs = 2001
model = V1_net(h, scale=scale).to('cuda')
loss_func = nn.CrossEntropyLoss()
optim = torch.optim.SGD(model.parameters(), lr=lr)

test_err_list_V1 = []
for i in range(n_epochs):
    optim.zero_grad()
    loss = loss_func(model(X_train_cuda), y_train_cuda)
    loss.backward()
    optim.step()
    
    train_err = error(model, X_train_cuda, y_train_cuda)
    test_err = error(model, X_test_cuda, y_test_cuda)
    test_err_list_V1.append(test_err)
    
    if (i % 200 == 0):
        print('Epoch: %d, Loss=%0.5f, train_err=%0.5f, test_err=%0.5f' % (i, loss.data, train_err, test_err))

In [None]:
# %%time
# ## Run on GPU
# X_train_cuda, y_train_cuda = X_train.to('cuda'), y_train.to('cuda')
# X_test_cuda, y_test_cuda = X_test.to('cuda'), y_test.to('cuda')

# h, lr, scale = 500, 0.1, 2/d
# n_epochs = 2001
# model = He_net(h, scale=scale).to('cuda')
# loss_func = nn.CrossEntropyLoss()
# optim = torch.optim.SGD(model.parameters(), lr=lr)

# test_err_list_He = []
# for i in range(n_epochs):
#     optim.zero_grad()
#     loss = loss_func(model(X_train_cuda), y_train_cuda)
#     loss.backward()
#     optim.step()
    
#     train_err = error(model, X_train_cuda, y_train_cuda)
#     test_err = error(model, X_test_cuda, y_test_cuda)
#     test_err_list_He.append(test_err)
    
#     if (i % 200 == 0):
#         print('Epoch: %d, Loss=%0.5f, train_err=%0.5f, test_err=%0.5f' % (i, loss.data, train_err, test_err))

In [None]:
fig = plt.figure()
plt.plot(np.arange(n_epochs), test_err_list_V1, label='V1')
plt.plot(np.arange(n_epochs), test_err_list_He, label='He')
plt.yscale('log')
plt.legend()

In [None]:
np.random.multivariate_normal((5, 3), np.eye(2), (100))