In [1]:
from torchvision import datasets
from torchvision.transforms import ToTensor
import plotly.express as px
from torch.utils.data import DataLoader
import torch.nn as nn
from torch import optim
import torch
import numpy as np

In [2]:
train_data = datasets.MNIST(root='data',train=True,transform=ToTensor(),download=True)

In [3]:
#print(train_data.targets.size())
#px.imshow(train_data.data[0], binary_string=True)

In [4]:
class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d (
                in_channels=1,
                out_channels=8,
                kernel_size=5,
                stride=1,
                padding=2),
            nn.SELU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(8,16,5,1,2),
            nn.SELU(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Linear(16*7*7, 10)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = torch.flatten(x,1)
        x = self.fc(x)
        return x

In [5]:
def train_gradient(neural_net, loader, num_epochs, criterion):
    neural_net.train()
    running_loss = 0
    for epoch in range(num_epochs):
        for i,(inputs,targets) in enumerate(loader):
            optimizer.zero_grad()
            outputs = neural_net(inputs)
            loss = criterion(outputs,targets)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            if i%1000 == 0 and i != 0:
                print(f'Epoch : {epoch}, i : {i}, loss : {running_loss/1000:.3f}')
                running_loss = 0
    print('Finished training')

In [6]:
def train_ro(neural_net, loader, num_epochs, criterion, std=0.01):
    with torch.no_grad():
        neural_net.train()
        running_loss = 0
        best_loss_found = np.inf
        for epoch in range(num_epochs):
            for i,(inputs,targets) in enumerate(loader):
                
                neural_net_cand = NeuralNet()
                neural_net_cand.load_state_dict(neural_net.state_dict())
                
                neural_net_cand.conv1[0].weight += torch.normal(mean=0,std=std,size=neural_net.conv1[0].weight.size())
                neural_net_cand.conv2[0].weight += torch.normal(mean=0,std=std,size=neural_net.conv2[0].weight.size())
                neural_net_cand.fc.weight += torch.normal(mean=0,std=std,size=neural_net.fc.weight.size())
               
                outputs = neural_net_cand(inputs)
                loss = criterion(outputs,targets)

                if loss < best_loss_found:
                    best_loss_found = loss
                    neural_net = neural_net_cand

                    print(f'Epoch : {epoch}, i : {i}, loss : {loss:.3f}')

In [7]:
def loss_on_dataset(neural_net, data, criterion):
    with torch.no_grad():
        loader = DataLoader(data, batch_size=60000, shuffle=True)
        for inputs,targets in loader:
            outputs = neural_net(inputs)
            loss = criterion(outputs,targets)
            return loss.item()

In [8]:
%%time
neural_net = NeuralNet()
optimizer = optim.SGD(neural_net.parameters(), lr=0.01, momentum=0.9)
loader = DataLoader(train_data, batch_size=50, shuffle=True)
num_epochs = 10
criterion = nn.CrossEntropyLoss()
train_gradient(neural_net, loader, num_epochs, criterion)
print('Loss on whole dataset after training on it : ', loss_on_dataset(neural_net, train_data, criterion))

Epoch : 0, i : 1000, loss : 0.223
Epoch : 1, i : 1000, loss : 0.090
Epoch : 2, i : 1000, loss : 0.066
Epoch : 3, i : 1000, loss : 0.057
Epoch : 4, i : 1000, loss : 0.046
Epoch : 5, i : 1000, loss : 0.043
Epoch : 6, i : 1000, loss : 0.038
Epoch : 7, i : 1000, loss : 0.032
Epoch : 8, i : 1000, loss : 0.029
Epoch : 9, i : 1000, loss : 0.027
Finished training
Loss on whole dataset after training on it :  0.018838059157133102
Wall time: 2min 23s


In [20]:
%%time
neural_net = NeuralNet()
loader = DataLoader(train_data, batch_size=60000, shuffle=True)
num_epochs = 18
criterion = nn.CrossEntropyLoss()
train_ro(neural_net, loader, num_epochs, criterion, std=0.01)
print('Loss on whole dataset after training on it : ', loss_on_dataset(neural_net, train_data, criterion))

Epoch : 0, i : 0, loss : 2.298
Epoch : 3, i : 0, loss : 2.295
Epoch : 13, i : 0, loss : 2.292
Epoch : 14, i : 0, loss : 2.290
Epoch : 16, i : 0, loss : 2.288
Loss on whole dataset after training on it :  2.3043665885925293
Wall time: 2min 34s


In [111]:
%%time
neural_net = NeuralNet()
loader = DataLoader(train_data, batch_size=50, shuffle=True)
num_epochs = 10
criterion = nn.CrossEntropyLoss()
train_ro(neural_net, loader, num_epochs, criterion, std=0.1)
print('Loss on whole dataset after training on it : ', loss_on_dataset(neural_net, train_data, criterion))

Epoch : 0, i : 0, loss : 3.183
Epoch : 0, i : 514, loss : 3.141
Loss on whole dataset after training on it :  2.3157432079315186


In [75]:
neural_net = NeuralNet()
loader = DataLoader(train_data, batch_size=50, shuffle=True)
num_epochs = 10
criterion = nn.CrossEntropyLoss()
train_zero_order(neural_net, loader, num_epochs, criterion, std=0.05)

Epoch : 0, i : 0, loss : 2.388
Epoch : 0, i : 7, loss : 2.388
Epoch : 0, i : 20, loss : 2.360
Epoch : 0, i : 37, loss : 2.275
Epoch : 0, i : 91, loss : 2.084
Epoch : 3, i : 123, loss : 2.068


In [None]:
%%time
neural_net = NeuralNet()
loader = DataLoader(train_data, batch_size=50, shuffle=True)
num_epochs = 10
criterion = nn.CrossEntropyLoss()
train_ro(neural_net, loader, num_epochs, criterion, std=1)
print('Loss on whole dataset after training on it : ', loss_on_dataset(neural_net, train_data, criterion))

In [5]:
class ASSRS:
    def __init__(self, func, nb_dim, step, a, i2_limit, step_decrease, i1_freq, step_increase):
        self.func = func
        self.nb_dim = nb_dim
        self.step = step
        self.a = a
        self.i2_limit = i2_limit
        self.step_decrease = step_decrease
        self.i1_freq = i1_freq
        self.step_increase = step_increase
        
        self.reset()
    
    
    def reset(self):
        self.i1 = 0
        self.i2 = 0
        self.x = np.zeros(self.nb_dim)
        self.f_x = np.inf
        
    def random_point_hypersphere(self, step):
        x = np.random.normal(0, step, size=(self.nb_dim,))
        step = np.linalg.norm(x)
        return x / step
    
    def compare_step_sizes(self, step_1, step_2):
        x_1 = self.x + self.random_point_hypersphere(step_1,)
        x_2 = self.x + self.random_point_hypersphere(step_2)
        f_1 = self.func(x_1)
        f_2 = self.func(x_2)
        print(f_1)
        print(f_2)
        return (x_1,f_1,step_1) if f_1 < f_2 else (x_2,f_2,step_2)
    
    def iterate(self, verbose=False):
        print(self.step)
        # 1 Step size of nominal step size
        # 1 Step size of large step size
        if self.i1%self.i1_freq==0 and self.i1!=0:
            x_cand,f_cand,step_cand = self.compare_step_sizes(self.step, self.step+self.step_increase)
            if f_cand < self.f_x:
                self.x,self.f_x,self.step = x_cand,f_cand,step_cand
            if verbose:
                print(f'Size step : {self.step}, loss : {self.f_x}')
        
        larger_step = self.step*(1+self.a)
        x_cand,f_cand,step_cand = self.compare_step_sizes(self.step, larger_step)
        
        if f_cand < self.f_x:
            # One step produced an improvement
            self.x,self.f_x,self.step = x_cand, f_cand,step_cand
            self.i2 = 0
            if verbose:
                print(f'Size step : {self.step}, loss : {self.f_x}')
        else:
            # No steps produced an improvement
            self.i2 = self.i2 + 1
            if self.i2 == self.i2_limit:
                # No improvement for a long time, reduce step size
                self.step *= 1-self.step_decrease
                self.i2 = 0

In [6]:
class CNN_Holder:
    def __init__(self, cnn):
        self.cnn = cnn
        self.weights_1_size = torch.prod(torch.tensor(cnn.conv1[0].weight.shape))
        self.weights_2_size = torch.prod(torch.tensor(cnn.conv2[0].weight.shape))
        self.weights_3_size = torch.prod(torch.tensor(cnn.fc.weight.shape))
        self.weights_size = self.weights_1_size + self.weights_2_size + self.weights_3_size

In [11]:
# first argument : parameters of the NN, that ASSRS will try to fine tune
# second : targets, given at the beginning of each epoch (ASSRS does not see it)
def loss_func(x, cnn_h, inputs, targets):
    # convert x to weights
    cnn_h.cnn.conv1[0].weights = x[:cnn_h.weights_1_size]
    cnn_h.cnn.conv2[0].weights = x[cnn_h.weights_1_size:cnn_h.weights_2_size]
    cnn_h.cnn.fc.weights = x[cnn_h.weights_2_size:]
    print(cnn_h.cnn(inputs).shape)
    print(targets.shape)
    loss = nn.CrossEntropyLoss(cnn_h.cnn(inputs),targets)
    print(loss)
    return loss

def train_assrs(cnn, loader, num_epochs=10):
    with torch.no_grad():
        cnn_h = CNN_Holder(cnn)
        assrs = ASSRS(func=lambda x:print(x),
                      nb_dim=cnn_h.weights_size,
                      step=0.1,
                      a=0.01,
                      i2_limit=10,
                      step_decrease=0.01,
                      i1_freq=10,
                      step_increase=0.01)
        cnn.train()
        running_loss = 0
        for epoch in range(num_epochs):
            for i1,(inputs,targets) in enumerate(loader):
                
                assrs.func = lambda x: loss_func(x,cnn_h,inputs,targets)
                assrs.iterate()

                print(f'Epoch : {epoch}, i : {i}, loss : {assrs.f_x:.3f}')

In [12]:
%%time
cnn = NeuralNet()
loader = DataLoader(train_data, batch_size=5000, shuffle=True)
num_epochs = 10
criterion = nn.CrossEntropyLoss()
train_assrs(cnn, loader, num_epochs)
print('Loss on whole dataset after training on it : ', loss_on_dataset(cnn, train_data, criterion))

0.1
torch.Size([5000, 10])
torch.Size([5000])


RuntimeError: Boolean value of Tensor with more than one value is ambiguous

In [21]:
class A:
    def __init__(self,att):
        self.att=att
    def oui():
        print(self.att)

def g(f):
    f()

a = A(10)

In [22]:
a.oui()

TypeError: oui() takes 0 positional arguments but 1 was given

In [29]:
a = torch.tensor([1,2,3,4])
a[0:3]

tensor([1, 2, 3])

In [31]:
a = 5
b = lambda x: x+a
b(10)

15