<a href="https://colab.research.google.com/github/ArturoMaiani/DLAI-project/blob/main/DLAI_project_Maiani.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Imports and Dataset are taken from Notebook "Autograd and Modules"

### Imports

In [1]:

from typing import Mapping, Union, Optional
import random

import numpy as np
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch import nn
from torch import optim
from torch.optim.lr_scheduler import LinearLR,MultiStepLR
from matplotlib import pyplot as plt

import torch.optim as optim
import plotly.graph_objects as go
import math
from torchvision import datasets, transforms
from tqdm.notebook import tqdm


from traitlets.traitlets import validate
import plotly.figure_factory as ff

import numpy as np
from scipy.spatial import Delaunay

from sklearn.decomposition import PCA
import tabulate
from tabulate import tabulate


torch.manual_seed(42)
np.random.seed(42)
random.seed(0)

torch.cuda.manual_seed(0)
torch.backends.cudnn.deterministic = True  # Note that this Deterministic mode can have a performance impact
torch.backends.cudnn.benchmark = False

### dataset

In [2]:
!wget https://s3.amazonaws.com/img-datasets/mnist.npz
#se cerchi questo link ti scarica MNIST non c'e una pagina specifica

def load_data_impl():
    # file retrieved by:
    #   wget https://s3.amazonaws.com/img-datasets/mnist.npz -O code/dlgo/nn/mnist.npz
    # code based on:
    #   site-packages/keras/datasets/mnist.py

    # NPZ is a file format by numpy that provides storage of array data using gzip compression. 
    # This imageio plugin supports data of any shape, and also supports multiple images per file.
    path = 'mnist.npz'
    f = np.load(path)
    print(type(f))
    x_train, y_train = f['x_train'].reshape(-1, 784), f['y_train']
    x_test, y_test = f['x_test'].reshape(-1, 784), f['y_test']
    f.close()
    return (x_train.astype(np.float32), y_train), (x_test.astype(np.float32), y_test)

--2022-08-27 09:23:45--  https://s3.amazonaws.com/img-datasets/mnist.npz
Resolving s3.amazonaws.com (s3.amazonaws.com)... 54.231.169.88
Connecting to s3.amazonaws.com (s3.amazonaws.com)|54.231.169.88|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 11490434 (11M) [application/octet-stream]
Saving to: ‘mnist.npz’


2022-08-27 09:23:46 (15.9 MB/s) - ‘mnist.npz’ saved [11490434/11490434]



In [3]:
(x_train, y_train), (x_valid, y_valid) = load_data_impl()
print('Mean',np.mean(x_train)/255,'\n','Std',np.sqrt(np.var(x_train)/255**2))

<class 'numpy.lib.npyio.NpzFile'>
Mean 0.130660576913871 
 Std 0.3081076236548345


In [4]:
# Normalization with values pre-computed

x_train = (x_train / 255 - 0.13) / 0.3  # data normalization
x_valid = (x_valid / 255 - 0.13) / 0.3

Each image is `28 x 28`, and is being stored as a flattened row of length
`784 (=28x28)`. Let's take a look at one; we need to reshape it to 2d
first.


In [5]:
import plotly.express as px
import numpy as np

print(x_train.shape)
px.imshow(x_train[0].reshape((28, 28)), color_continuous_scale='gray')

(60000, 784)


In [6]:
import torch

x_train, y_train, x_valid, y_valid = map(
  torch.tensor, (x_train, y_train, x_valid, y_valid)
)
n, c = x_train.shape
y_train = y_train.long()  # we will use targets as indices and pytorch wants int64 as indices
y_valid = y_valid.long()
print(x_train, y_train)
print(x_train.shape)
print(y_train.min(), y_train.max())

tensor([[-0.4333, -0.4333, -0.4333,  ..., -0.4333, -0.4333, -0.4333],
        [-0.4333, -0.4333, -0.4333,  ..., -0.4333, -0.4333, -0.4333],
        [-0.4333, -0.4333, -0.4333,  ..., -0.4333, -0.4333, -0.4333],
        ...,
        [-0.4333, -0.4333, -0.4333,  ..., -0.4333, -0.4333, -0.4333],
        [-0.4333, -0.4333, -0.4333,  ..., -0.4333, -0.4333, -0.4333],
        [-0.4333, -0.4333, -0.4333,  ..., -0.4333, -0.4333, -0.4333]]) tensor([5, 0, 4,  ..., 5, 6, 8])
torch.Size([60000, 784])
tensor(0) tensor(9)


In [7]:
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset

bs = 64  # batch size

train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)

valid_ds = TensorDataset(x_valid, y_valid)
valid_dl = DataLoader(valid_ds, batch_size=bs * 2)




def accuracy(out, yb):
  preds = torch.argmax(out, dim=1)
  return (preds == yb).float().mean()

xb = x_valid
yb = y_valid

## TRAINING


Mnist_logistic() represents the standard unconstrained network.

Mnist_logistic_constrained() represents the network with weights constrained to a linear subspace

Mnist_ellipsoid() represents the network with weigths constrained to ellipsoid like manifold

##### Models definition

In [8]:


w1=784
w2=200
w3=100

loss_func = F.cross_entropy

class Mnist_Logistic(nn.Module):
  def __init__(self):
    super().__init__()
    self.weights1 = nn.Parameter(torch.randn(w1, w2) / math.sqrt(w1))
    self.bias1 = nn.Parameter(torch.zeros(w2))
    self.weights2 = nn.Parameter(torch.randn(w2, w3) / math.sqrt(w2))
    self.bias2 = nn.Parameter(torch.zeros(w3))
    self.weights3 = nn.Parameter(torch.randn(w3, 10) / math.sqrt(w3))
    self.bias3 = nn.Parameter(torch.zeros(10))

    self.dropout = nn.Dropout(p=0.2)

  def forward(self, xb):
    z=F.leaky_relu(torch.matmul(xb,self.weights1) + self.bias1)
    z=self.dropout(z)
    z=F.leaky_relu(torch.matmul(z,self.weights2) + self.bias2)
    
    z=F.softmax(torch.matmul(z,self.weights3) + self.bias3)
    return z

class Mnist_Logistic_constrained(nn.Module):
  def __init__(self,d):
    super().__init__()

    N=w1*w2+w2+w2*w3+w3+w3*10+10
    self.Ortogonal=torch.normal(0,0.1,(N,d))


    self.theta = nn.Parameter(torch.normal(0,0.01,(d,1)))
    self.dropout = nn.Dropout(p=0.2)
    

  def forward(self, xb):

    self.A=torch.mm(self.Ortogonal,self.theta)
    self.weights1=self.A[0:w1*w2].reshape(w1,w2)
    self.bias1 = self.A[w1*w2:w1*w2+w2].reshape(w2,1)

    self.weights2=self.A[w1*w2+w2:w1*w2+w2+w2*w3].reshape(w2,w3)
    self.bias2=self.A[w1*w2+w2+w2*w3:w1*w2+w2+w2*w3+w3].reshape(w3,1)
    
    self.weights3=self.A[w1*w2+w2+w2*w3+w3:w1*w2+w2+w2*w3+w3+w3*10].reshape(w3,10)
    self.bias3=self.A[w1*w2+w2+w2*w3+w3+w3*10:].reshape(10,1)
    z=F.leaky_relu(torch.matmul(xb,self.weights1) + self.bias1.T)
    z=self.dropout(z)
    z=F.leaky_relu(torch.matmul(z,self.weights2) + self.bias2.T)
    
    z=F.softmax(torch.matmul(z,self.weights3) + self.bias3.T)
    return z



class Mnist_ellipsoid(nn.Module):
  def __init__(self,d):
    super().__init__()
    self.d=d
    self.raggio=0.15
    N=w1*w2+w2+w2*w3+w3+w3*10+10
    self.Ortogonal = torch.normal(0,0.1,(N,d+1))
    self.radiuses=torch.rand(d+1)*1.5+0.5
    self.param=nn.Parameter((3.14*1.9)*torch.rand(d+1)-3.10)
    self.c=torch.triu(torch.ones(d+1,d+1),diagonal=1)[:,0:d]
    self.dropout = nn.Dropout(p=0.2)

  def forward(self, xb):

    # build the hyperellipsoid---------------------

    # build matrix full of sinus
    self.a=torch.sin(self.param).repeat(self.d+1,1)
    # take only the lower triangular except from the diagonal
    self.a=torch.tril(self.a,diagonal=-1)[:,0:self.d]
    # create the diagonal with cosines
    self.b=torch.diag(torch.cos(self.param))[:,0:self.d]
    # take the product row-wise
    self.vec=torch.prod(self.a+self.b+self.c,1)
    #multiply each component by a random radius
    self.vec=self.vec*self.radiuses 
    # multiply the hyperellipsoid to the linear subspace
    self.A=torch.mv(self.Ortogonal,self.vec)#+self.initial
    #-------------------------------------------

    

    self.weights1=self.A[0:w1*w2].reshape(w1,w2)
    self.bias1 = self.A[w1*w2:w1*w2+w2].reshape(w2,1)

    self.weights2=self.A[w1*w2+w2:w1*w2+w2+w2*w3].reshape(w2,w3)
    self.bias2=self.A[w1*w2+w2+w2*w3:w1*w2+w2+w2*w3+w3].reshape(w3,1)
    
    self.weights3=self.A[w1*w2+w2+w2*w3+w3:w1*w2+w2+w2*w3+w3+w3*10].reshape(w3,10)
    self.bias3=self.A[w1*w2+w2+w2*w3+w3+w3*10:].reshape(10,1)

    z=F.leaky_relu(torch.matmul(xb,self.weights1) + self.bias1.T)
    z=F.leaky_relu(torch.matmul(z,self.weights2) + self.bias2.T)
    z=self.dropout(z)
    z=F.softmax(torch.matmul(z,self.weights3) + self.bias3.T)
    return z



In [1]:
w1=784
w2=200
w3=100
w1*w2+w2+w2*w3+w3+w3*10+10

178110

#### Training utilities

In [18]:
def get_data(train_ds, valid_ds, bs):
  return (
    DataLoader(train_ds, batch_size=bs, shuffle=True),
    DataLoader(valid_ds, batch_size=bs * 2),
  )
train_dl, valid_dl = get_data(train_ds, valid_ds, bs)

def step(model, loss_func, xb, yb, opt=None,scheduler=None):
  loss = loss_func(model(xb), yb)

  if opt is not None:
    loss.backward()
    opt.step()
    opt.zero_grad()
    if scheduler!=None:
      scheduler.step()

  return loss.item(), len(xb)

def fit(epochs, model, loss_func, opt,train_dl, valid_dl, scheduler=None):

  accuracy_list=[accuracy(model(x_valid), y_valid)]
  
  for epoch in tqdm(range(epochs)):
    model.train()
    for xb, yb in train_dl:
      step(model, loss_func, xb, yb, opt,scheduler)

    model.eval()
    with torch.no_grad():
      accuracy_list.append(accuracy(model(x_valid), y_valid)) # on the whole validation set
    fig = go.Figure()
    x = np.arange(len(accuracy_list))
    fig.add_trace(go.Scatter(x=x, y=accuracy_list, mode='lines'))
    fig.update_layout(
            title='Accuracy across epochs',
            xaxis_title="epoch / test frequency",
            yaxis_title="Accuracy")
  fig.show() 



#### Training the standard model

In [22]:
def get_model_std():
  model = Mnist_Logistic()#model(x)
  return model, optim.Adam(model.parameters(), lr=5e-3)

model, opt = get_model_std()
scheduler=LinearLR(opt, start_factor=1,end_factor=0.1, total_iters=4)
fit(4, model, loss_func, opt, train_dl, valid_dl,scheduler)


Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



  0%|          | 0/4 [00:00<?, ?it/s]

#### Training for the linear subspace with random matrices

##### Trained networks linear constraint


In [None]:
def get_model_linear(d):
  model = Mnist_Logistic_constrained(d)#model(x)
  return model, optim.Adam(model.parameters(), lr=5*1e-4)

###### d=800

In [None]:
model, opt = get_model_linear(800)
scheduler=LinearLR(opt, start_factor=1,end_factor=0.1, total_iters=2)
fit(2, model, loss_func, opt, train_dl, valid_dl)


Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



  0%|          | 0/2 [00:00<?, ?it/s]

###### d=750

In [None]:
model, opt = get_model_linear(750)
scheduler=LinearLR(opt, start_factor=1,end_factor=0.1, total_iters=2)
fit(2, model, loss_func, opt, train_dl, valid_dl)


Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



  0%|          | 0/2 [00:00<?, ?it/s]

###### d=725 (two examples)

In [None]:
model, opt = get_model_linear(725)
scheduler=LinearLR(opt, start_factor=1,end_factor=0.1, total_iters=2)
fit(2, model, loss_func, opt, train_dl, valid_dl)


Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



  0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
model, opt = get_model_linear(725)
scheduler=LinearLR(opt, start_factor=1,end_factor=0.1, total_iters=2)
fit(2, model, loss_func, opt, train_dl, valid_dl)


Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



  0%|          | 0/2 [00:00<?, ?it/s]

###### d=700

In [None]:
model, opt = get_model_linear(700)
scheduler=LinearLR(opt, start_factor=1,end_factor=0.1, total_iters=2)
fit(2, model, loss_func, opt, train_dl, valid_dl)


Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



  0%|          | 0/2 [00:00<?, ?it/s]

###### d=650

In [None]:

model, opt = get_model_linear(650)
scheduler=LinearLR(opt, start_factor=1,end_factor=0.1, total_iters=2)
fit(2, model, loss_func, opt, train_dl, valid_dl)


Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



  0%|          | 0/2 [00:00<?, ?it/s]

#### Training for the hypersphere with random matrices

##### Trained networks for ellipsoid constraint


In [25]:
def get_model_ell(d):
  model = Mnist_ellipsoid(d)#model(x)
  return model, optim.Adam(model.parameters(), lr=6e-3)

###### d=800

In [None]:
model, opt = get_model_ell(800)
scheduler=LinearLR(opt, start_factor=1,end_factor=0.8, total_iters=7)
fit(7, model, loss_func, opt, train_dl, valid_dl,scheduler)

# In the following plots you don't see written 'Accuracy across test epoch' like in the hyperplane plots. 
# This is because the plots were obtained before i changed the code to be more readable. In the following commented 
# section you can see how ugly the code was before. The new code is equivalent but i didn't want to throw away 
# all the plots i obtained.
'''
epochs=7
def get_model_ell(d):
  model = Mnist_ellipsoid(d)#model(x)
  return model, optim.Adam(model.parameters(), lr=6e-3)
model_ell,opt_ell=get_model_ell(800)
acc_hyp=[accuracy(model_ell(xb), yb)]
scheduler_ell =  LinearLR(opt_ell, start_factor=1,end_factor=0.8, total_iters=epochs)
for epoch in tqdm(range(epochs)):
  model_ell.train()
  for xb, yb in train_dl:
    pred = model_ell(xb)
    loss = loss_func(pred, yb)
    loss.backward()
    opt_ell.step()
    opt_ell.zero_grad()
    scheduler_ell.step()  
  model_ell.eval()
  with torch.no_grad():
    acc_hyp.append(accuracy(model_ell(xb), yb))
    
fig = go.Figure()
x = np.arange(len(acc_hyp))
fig.add_trace(go.Scatter(x=x, y=acc_hyp, mode='lines'))
'''

  0%|          | 0/2 [00:00<?, ?it/s]


Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



###### d=750

In [None]:
model, opt = get_model_ell(750)
scheduler=LinearLR(opt, start_factor=1,end_factor=0.8, total_iters=7)
fit(7, model, loss_func, opt, train_dl, valid_dl,scheduler)

###### d=725

In [None]:
model, opt = get_model_ell(725)
scheduler=LinearLR(opt, start_factor=1,end_factor=0.5, total_iters=8)
fit(8, model, loss_func, opt, train_dl, valid_dl,scheduler)

  0%|          | 0/1 [00:00<?, ?it/s]


Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



###### d=700

In [None]:
model, opt = get_model_ell(700)
scheduler=LinearLR(opt, start_factor=1,end_factor=0.2, total_iters=9)
fit(9, model, loss_func, opt, train_dl, valid_dl,scheduler)


  0%|          | 0/1 [00:00<?, ?it/s]


Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



#### Summary and Comparison between training of Hyperplane and Hyperellipsoid

In [64]:
lista_accuracy=[0.84,0.875,0.96,0.9,0.9]
fig = go.Figure()
x=[650,700,725,750,800]
fig.add_trace(go.Scatter(x=x, y=lista_accuracy,name='Hyperplane', mode='markers',marker=dict(
                            size=30)))
fig.add_trace(go.Scatter(x=[650,675,700,725,750,775,800], y=[0.88,0.88,0.88,0.88,0.88,0.88,0.88],name='Treshold', line = dict(color='firebrick', width=4)))

lista_accuracy_ell=[0.75,0.8,0.875,0.9]

x_ell=[700,725,750,800]
fig.add_trace(go.Scatter(x=x_ell, y=lista_accuracy_ell,name='Ellipsoid', mode='markers',marker=dict(
                            size=30,color='#9467bd')))
fig.update_layout(
        title='Accuracy across subspace dimension',
        xaxis_title="subspace dimension",
        yaxis_title="Accuracy on test set")
fig.update_layout(
    autosize=False,
    width=500,
    height=400,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
    paper_bgcolor="LightSteelBlue",
)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Black')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='Black')
fig.update_layout(
    xaxis = dict(
        tickmode = 'linear',
        tick0 = 650,
        dtick = 25
    )
)
fig.update_layout(
    yaxis = dict(
        tickmode = 'linear',
        tick0 = 0.8,
        dtick = 0.025
    )
)

## Training on new structure for P based on SVD. Training on (30,30) MLP

Define the smaller model, this will be used for sampling the trajectory

In [9]:
w1_=784
w2_=30
w3_=30
loss_func = F.cross_entropy
class Mnist_Logistic_small_net(nn.Module):
  def __init__(self):
    super().__init__()
    self.weights1 = nn.Parameter(torch.randn(w1_, w2_) / math.sqrt(w1_))
    self.bias1 = nn.Parameter(torch.zeros(w2_))
    self.weights2 = nn.Parameter(torch.randn(w2_, w3_) / math.sqrt(w2_))
    self.bias2 = nn.Parameter(torch.zeros(w3_))
    self.weights3 = nn.Parameter(torch.randn(w3_, 10) / math.sqrt(w3_))
    self.bias3 = nn.Parameter(torch.zeros(10))

    self.dropout1 = nn.Dropout(p=0.4)
    self.dropout2 = nn.Dropout(p=0.3)
  def forward(self, xb):
    z=self.dropout1(xb)
    z=F.leaky_relu(torch.matmul(z,self.weights1) + self.bias1)
    z=self.dropout2(z)
    z=F.leaky_relu(torch.matmul(z,self.weights2) + self.bias2)
    
    z=F.softmax(torch.matmul(z,self.weights3) + self.bias3)
    return z


N=w1_*w2_+w2_+w2_*w3_+w3_+w3_*10+10

##### Collect the trajectory of the unconstrained weights: collect 100 samples.Start to collect after 1000 batches have been done and then sample every 30 batches. This so to sample the position of weights more near to the optimum value.

In [54]:
number_of_samples=100
def get_model_std():
  model = Mnist_Logistic_small_net()#model(x)
  return model, optim.Adam(model.parameters(), lr=1e-2)
model_std,opt_std=get_model_std()



k=0
l=0
listona_parametri=[]
for epoch in tqdm(range(3)):
  model_std.train()
  for xb, yb in train_dl:#each batch is 64 samples hence to cover an epoch (60K samples)we need 1000 iterations
    if l>number_of_samples:
      print(k)
      break
    pred = model_std(xb)
    loss = loss_func(pred, yb)

    loss.backward()
    opt_std.step()
    opt_std.zero_grad()
    if k>400 and k%15==0:
      l+=1
      #print(l)
      with torch.no_grad():
        
        for parameter in model_std.parameters():
          if parameter.requires_grad:
            for elem in parameter.reshape(-1):
              listona_parametri.append(elem)
      
        
    k+=1


tensore_preprocess=torch.tensor(listona_parametri).reshape(N,number_of_samples+1)

print(tensore_preprocess.size())
torch.save(tensore_preprocess, 'tensore_preprocess.pt')

  0%|          | 0/3 [00:00<?, ?it/s]


Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



1906


In [None]:
# The following command cannot be executed for RAM limitation in colab
# torch.linalg.svd(tensore_preprocess)
# we need to save the tensor and execute the SVD in local, loading the tensor with torch.load(<>.pt) 
# and then bring it back here by saving it on drive, where it can be used as orthonormal matrix

##### Othonormal matrix obtained after computing SVD on local machine

In [10]:
# On local machine the SVD takes only a couple of minutes for a 20'000 by 300 matrix
from google.colab import drive
from google.colab import files
drive.mount('/content/drive')
SVD=torch.load('/content/drive/MyDrive/Colab_Notebooks/tensore_postprocess.pt')#this is the PCA

Mounted at /content/drive


##### Vanilla hyperplane vs SVD hyperplane

Use a combined matrix, the first d columns are obtained from the SVD and the remaining ones are chosen as random gaussians, but each entry has a st. deviation on 0.1 and zero mean

In [66]:
#COEFFICIENT GAMMA, approximately 250/300
torch.sum(torch.normal(0,0.1,(N,1))**2)


tensor(251.1248)

DEFINE THE CONSTRAINED MODEL, WITH SVD MATRIX

The model Mnist_Logistic_constrained_SVD() has 3 entries: 
1. d) the intrinsic dimension chosen
1. k) if k==0 we select the Vanilla Hyperplane, with k!=0 we select SVD hyperplane
1. l) means how many columns of the SVD matrix we want to plane in P

In the following cell we will perform a comparison on 20 trainings for Vanilla and 20 for SVD hyperplane and see the statistics


In [None]:
w1_=784
w2_=30
w3_=30

loss_func = F.cross_entropy

class Mnist_Logistic_constrained_SVD(nn.Module):
  def __init__(self,d,k,l):# k==0 means Vanilla Hyperplane, k!=0 means SVD Hyperplane
    super().__init__()     # d is just how many columns of the SVD matrix we want to place inside our P

    N_=w1_*w2_+w2_+w2_*w3_+w3_+w3_*10+10
    if k==0:
      self.Ortogonal=torch.normal(0,0.1,(N,d))
    else:
      self.Ortogonal=torch.cat((SVD[:,0:l]*300,torch.normal(0,0.1,(N,d-l))),1)

    #self.Ortogonal=

    self.theta = nn.Parameter(torch.normal(0,0.001,(d,1)))
    self.dropout1 = nn.Dropout(p=0.4)
    self.dropout2 = nn.Dropout(p=0.3)

  def forward(self, xb):

    self.A=torch.mm(self.Ortogonal,self.theta)
    self.weights1=self.A[0:w1_*w2_].reshape(w1_,w2_)
    self.bias1 = self.A[w1_*w2_:w1_*w2_+w2_].reshape(w2_,1)

    self.weights2=self.A[w1_*w2_+w2_:w1_*w2_+w2_+w2_*w3_].reshape(w2_,w3_)
    self.bias2=self.A[w1_*w2_+w2_+w2_*w3_:w1_*w2_+w2_+w2_*w3_+w3_].reshape(w3_,1)
    
    self.weights3=self.A[w1_*w2_+w2_+w2_*w3_+w3_:w1_*w2_+w2_+w2_*w3_+w3_+w3_*10].reshape(w3_,10)
    self.bias3=self.A[w1_*w2_+w2_+w2_*w3_+w3_+w3_*10:].reshape(10,1)

    z=self.dropout1(xb)
    z=F.leaky_relu(torch.matmul(z,self.weights1) + self.bias1.T)
    z=self.dropout2(z)
    z=F.leaky_relu(torch.matmul(z,self.weights2) + self.bias2.T)
    
    z=F.softmax(torch.matmul(z,self.weights3) + self.bias3.T)
    return z


In [38]:
accuracy_list_1=[]#the list for the SVD hyperplane
numero_esempi=20
for i in range(numero_esempi):

    model_svd=Mnist_Logistic_constrained_SVD(750,1,15)
    opt_svd=optim.Adam(model_svd.parameters(), lr=4e-4)
    scheduler_svd = LinearLR(opt_svd, start_factor=1,end_factor=0.1, total_iters=epochs)

    epochs=4
    acc=[]
    for epoch in tqdm(range(epochs)):
      model_svd.train()
      for xb, yb in train_dl:
        pred = model_svd(xb)
        loss = loss_func(pred, yb)
        loss.backward()
        opt_svd.step()
        opt_svd.zero_grad()
      model_svd.eval()
      with torch.no_grad():
        acc.append(accuracy(model_svd(xb), yb))
        
    accuracy_list_1.append(max(acc))

accuracy_list_2=[]#the list for the Vanilla hyperplane

for i in range(numero_esempi):

    model_svd=Mnist_Logistic_constrained_SVD(750,0,0)
    opt_svd=optim.Adam(model_svd.parameters(), lr=4e-4)
    scheduler_svd = LinearLR(opt_svd, start_factor=1,end_factor=0.5, total_iters=epochs)

    epochs=4
    acc=[]
    for epoch in tqdm(range(epochs)):
      model_svd.train()
      for xb, yb in train_dl:
        pred = model_svd(xb)
        loss = loss_func(pred, yb)
        loss.backward()
        opt_svd.step()
        opt_svd.zero_grad()
      model_svd.eval()
      with torch.no_grad():
        acc.append(accuracy(model_svd(xb), yb))
        
    accuracy_list_2.append(max(acc))

fig = go.Figure()
x = np.arange(len(accuracy_list_2))
fig.add_trace(go.Scatter(x=x, y=accuracy_list_1, mode='markers',marker=dict(
                            size=30),name='Hyperplane+SVD'))
fig.add_trace(go.Scatter(x=x, y=accuracy_list_2, mode='markers',marker=dict(
                            size=30,color='#9467bd',name='Vanilla Hyperplane')))
fig.update_layout(
        title='Accuracy comparison',
        xaxis_title="tests",
        yaxis_title="Accuracy")

  0%|          | 0/4 [00:00<?, ?it/s]


Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

In [63]:
trace1 = go.Histogram(
    x=accuracy_list_1,
    opacity=0.75,
    histnorm='probability',
    name='Hyperplane+SVD'
)
trace2 = go.Histogram(
    x=accuracy_list_2,
    opacity=0.75,
    histnorm='probability',
    name='Vanilla Hyperplane'
)

data = [trace1, trace2]

layout = go.Layout(
    title='Comparison',
    barmode='overlay',
    xaxis=dict(
    title='Accuracy'
    ),
    yaxis=dict(
        title='Normalized Frequency'
    ),
    yaxis2=dict(
        title='Normalized Frequency',
        anchor='free',
        overlaying='y',
        side='right',
        position=1
    )        
) 

fig = go.Figure(data=data, layout=layout)
fig.update_layout(
    autosize=False,
    width=700,
    height=500,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
    paper_bgcolor="LightSteelBlue",
)
fig.show()