### Imports

In [1]:

from typing import Mapping, Union, Optional
import random

import numpy as np
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import plotly.graph_objects as go
import math
from torchvision import datasets, transforms
from tqdm.notebook import tqdm
from scipy.stats import special_ortho_group

from traitlets.traitlets import validate
import plotly.figure_factory as ff

import numpy as np
from scipy.spatial import Delaunay

from sklearn.decomposition import PCA
import tabulate
from tabulate import tabulate


torch.manual_seed(42)
np.random.seed(42)
random.seed(0)

torch.cuda.manual_seed(0)
torch.backends.cudnn.deterministic = True  # Note that this Deterministic mode can have a performance impact
torch.backends.cudnn.benchmark = False

### dataset

In [2]:
!wget https://s3.amazonaws.com/img-datasets/mnist.npz
#se cerchi questo link ti scarica MNIST non c'e una pagina specifica

def load_data_impl():
    # file retrieved by:
    #   wget https://s3.amazonaws.com/img-datasets/mnist.npz -O code/dlgo/nn/mnist.npz
    # code based on:
    #   site-packages/keras/datasets/mnist.py

    # NPZ is a file format by numpy that provides storage of array data using gzip compression. 
    # This imageio plugin supports data of any shape, and also supports multiple images per file.
    path = 'mnist.npz'
    f = np.load(path)
    print(type(f))
    x_train, y_train = f['x_train'].reshape(-1, 784), f['y_train']
    x_test, y_test = f['x_test'].reshape(-1, 784), f['y_test']
    f.close()
    return (x_train.astype(np.float32), y_train), (x_test.astype(np.float32), y_test)

--2022-08-12 14:30:41--  https://s3.amazonaws.com/img-datasets/mnist.npz
Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.217.193.40
Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.217.193.40|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 11490434 (11M) [application/octet-stream]
Saving to: ‘mnist.npz.1’


2022-08-12 14:30:41 (51.1 MB/s) - ‘mnist.npz.1’ saved [11490434/11490434]



In [3]:
(x_train, y_train), (x_valid, y_valid) = load_data_impl()
print('Mean',np.mean(x_train)/255,'\n','Std',np.sqrt(np.var(x_train)/255**2))

<class 'numpy.lib.npyio.NpzFile'>
Mean 0.130660576913871 
 Std 0.3081076236548345


In [4]:
# Normalization with values pre-computed

x_train = (x_train / 255 - 0.13) / 0.3  # data normalization
x_valid = (x_valid / 255 - 0.13) / 0.3

Each image is `28 x 28`, and is being stored as a flattened row of length
`784 (=28x28)`. Let's take a look at one; we need to reshape it to 2d
first.


In [6]:
import plotly.express as px
import numpy as np

print(x_train.shape)
px.imshow(x_train[0].reshape((28, 28)), color_continuous_scale='gray')

(60000, 784)


In [7]:
import torch

x_train, y_train, x_valid, y_valid = map(
  torch.tensor, (x_train, y_train, x_valid, y_valid)
)
n, c = x_train.shape
y_train = y_train.long()  # we will use targets as indices and pytorch wants int64 as indices
y_valid = y_valid.long()
print(x_train, y_train)
print(x_train.shape)
print(y_train.min(), y_train.max())

tensor([[-0.4333, -0.4333, -0.4333,  ..., -0.4333, -0.4333, -0.4333],
        [-0.4333, -0.4333, -0.4333,  ..., -0.4333, -0.4333, -0.4333],
        [-0.4333, -0.4333, -0.4333,  ..., -0.4333, -0.4333, -0.4333],
        ...,
        [-0.4333, -0.4333, -0.4333,  ..., -0.4333, -0.4333, -0.4333],
        [-0.4333, -0.4333, -0.4333,  ..., -0.4333, -0.4333, -0.4333],
        [-0.4333, -0.4333, -0.4333,  ..., -0.4333, -0.4333, -0.4333]]) tensor([5, 0, 4,  ..., 5, 6, 8])
torch.Size([60000, 784])
tensor(0) tensor(9)


In [8]:
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset

bs = 64  # batch size

train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)

valid_ds = TensorDataset(x_valid, y_valid)
valid_dl = DataLoader(valid_ds, batch_size=bs * 2)




def accuracy(out, yb):
  preds = torch.argmax(out, dim=1)
  return (preds == yb).float().mean()

  

## TRAINING


##### Models definition

In [43]:
from torch import nn
from torch import optim
from torch.optim.lr_scheduler import LinearLR,MultiStepLR
from matplotlib import pyplot as plt

w1=784
w2=200
w3=100

loss_func = F.cross_entropy

class Mnist_Logistic(nn.Module):
  def __init__(self):
    super().__init__()
    self.weights1 = nn.Parameter(torch.randn(w1, w2) / math.sqrt(w1))
    self.bias1 = nn.Parameter(torch.zeros(w2))
    self.weights2 = nn.Parameter(torch.randn(w2, w3) / math.sqrt(w2))
    self.bias2 = nn.Parameter(torch.zeros(w3))
    self.weights3 = nn.Parameter(torch.randn(w3, 10) / math.sqrt(w3))
    self.bias3 = nn.Parameter(torch.zeros(10))

    self.dropout = nn.Dropout(p=0.2)

  def forward(self, xb):
    z=F.leaky_relu(torch.matmul(xb,self.weights1) + self.bias1)
    z=self.dropout(z)
    z=F.leaky_relu(torch.matmul(z,self.weights2) + self.bias2)
    
    z=F.softmax(torch.matmul(z,self.weights3) + self.bias3)
    return z

class Mnist_Logistic_constrained(nn.Module):
  def __init__(self,d):
    super().__init__()

    N=w1*w2+w2+w2*w3+w3+w3*10+10
    
    self.Ortogonal = torch.normal(0,0.1,(N,d))

    self.theta = nn.Parameter(torch.normal(0,0.01,(d,1)))
    self.dropout = nn.Dropout(p=0.2)
    

  def forward(self, xb):

    self.A=torch.mm(self.Ortogonal,self.theta)
    self.weights1=self.A[0:w1*w2].reshape(w1,w2)
    self.bias1 = self.A[w1*w2:w1*w2+w2].reshape(w2,1)

    self.weights2=self.A[w1*w2+w2:w1*w2+w2+w2*w3].reshape(w2,w3)
    self.bias2=self.A[w1*w2+w2+w2*w3:w1*w2+w2+w2*w3+w3].reshape(w3,1)
    
    self.weights3=self.A[w1*w2+w2+w2*w3+w3:w1*w2+w2+w2*w3+w3+w3*10].reshape(w3,10)
    self.bias3=self.A[w1*w2+w2+w2*w3+w3+w3*10:].reshape(10,1)
    z=F.leaky_relu(torch.matmul(xb,self.weights1) + self.bias1.T)
    z=self.dropout(z)
    z=F.leaky_relu(torch.matmul(z,self.weights2) + self.bias2.T)
    
    z=F.softmax(torch.matmul(z,self.weights3) + self.bias3.T)
    return z


class Mnist_ellipsoid(nn.Module):
  def __init__(self,d):
    super().__init__()
    self.d=d
    self.raggio=0.15
    N=w1*w2+w2+w2*w3+w3+w3*10+10
    self.Ortogonal = torch.normal(0,0.1,(N,d+1))
    self.radiuses=torch.rand(d+1)*1.5+0.5
    self.param=nn.Parameter((3.14*1.9)*torch.rand(d+1)-3.10)
    self.c=torch.triu(torch.ones(d+1,d+1),diagonal=1)[:,0:d]
    self.dropout = nn.Dropout(p=0.2)

  def forward(self, xb):

    # build the hyperellipsoid---------------------

    # build matrix full of sinus
    self.a=torch.sin(self.param).repeat(self.d+1,1)
    # take only the lower triangular except from the diagonal
    self.a=torch.tril(self.a,diagonal=-1)[:,0:self.d]
    # create the diagonal with cosines
    self.b=torch.diag(torch.cos(self.param))[:,0:self.d]
    # take the product row-wise
    self.vec=torch.prod(self.a+self.b+self.c,1)
    #multiply each component by a random radius
    self.vec=self.vec*self.radiuses 
    # multiply the hyperellipsoid to the linear subspace
    self.A=torch.mv(self.Ortogonal,self.vec)#+self.initial
    #-------------------------------------------

    

    self.weights1=self.A[0:w1*w2].reshape(w1,w2)
    self.bias1 = self.A[w1*w2:w1*w2+w2].reshape(w2,1)

    self.weights2=self.A[w1*w2+w2:w1*w2+w2+w2*w3].reshape(w2,w3)
    self.bias2=self.A[w1*w2+w2+w2*w3:w1*w2+w2+w2*w3+w3].reshape(w3,1)
    
    self.weights3=self.A[w1*w2+w2+w2*w3+w3:w1*w2+w2+w2*w3+w3+w3*10].reshape(w3,10)
    self.bias3=self.A[w1*w2+w2+w2*w3+w3+w3*10:].reshape(10,1)

    z=F.leaky_relu(torch.matmul(xb,self.weights1) + self.bias1.T)
    z=F.leaky_relu(torch.matmul(z,self.weights2) + self.bias2.T)
    z=self.dropout(z)
    z=F.softmax(torch.matmul(z,self.weights3) + self.bias3.T)
    return z

xb = x_valid
yb = y_valid

#### Training the standard model

In [None]:


def get_model_std():
  model = Mnist_Logistic()#model(x)
  return model, optim.Adam(model.parameters(), lr=1e-2)
model_std,opt_std=get_model_std()

scheduler_std = LinearLR(opt_std, start_factor=1,end_factor=0.01, total_iters=3)

epochs=3
acc=[accuracy(model_std(xb), yb)]
for epoch in tqdm(range(epochs)):
  model_std.train()
  for xb, yb in train_dl:
    pred = model_std(xb)
    loss = loss_func(pred, yb)

    loss.backward()
    opt_std.step()
    opt_std.zero_grad()

    scheduler_std.step()  
  model_std.eval()
  with torch.no_grad():
    #valid_loss = sum(loss_func(model_std(xb), yb) for xb, yb in valid_dl)
    #acc.append(valid_loss / len(valid_dl))
    acc.append(accuracy(model_std(xb), yb))
    
    
  #with torch.no_grad():
  #  valid_loss = sum(loss_func(model(xb), yb) for xb, yb in valid_dl)

  #print(epoch, valid_loss / len(valid_dl))
fig = go.Figure()
x = np.arange(len(acc))
fig.add_trace(go.Scatter(x=x, y=acc, mode='lines'))
fig.update_layout(
        title='Accuracy across epochs',
        xaxis_title="epoch / test frequency",
        yaxis_title="Accuracy")


Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



  0%|          | 0/5 [00:00<?, ?it/s]

In [19]:
lista_param=[]
for p in model_std.parameters():
    if p.requires_grad:
         lista_param.append(p.detach().numpy().reshape(-1))
  

In [25]:
torch.sqrt(torch.sum(torch.tensor(lista_param[0])**2))

tensor(16.0420)

#### Training for the linear subspace

##### Trained networks

###### d=800

In [44]:
epochs=2

def get_model_linear(d):
  model = Mnist_Logistic_constrained(d)#model(x)
  return model, optim.Adam(model.parameters(), lr=5*1e-4)

model,opt=get_model_linear(800)

#scheduler = LinearLR(opt, start_factor=1,end_factor=0.05, total_iters=epochs)
#scheduler = MultiStepLR(opt, milestones=[1], gamma=0.5)
# with MultiStepLR(opt, milestones=[1,2,3,4], gamma=0.6) and d=700 we got 0.84 accuracy

acc_lin=[accuracy(model(xb), yb)]
for epoch in tqdm(range(epochs)):
  model.train()
  for xb, yb in train_dl:
    pred = model(xb)
    loss = loss_func(pred, yb)

    loss.backward()
    opt.step()
    opt.zero_grad()

    #scheduler.step()  
  model.eval()
  with torch.no_grad():
    #valid_loss = sum(loss_func(model(xb), yb) for xb, yb in valid_dl)
    #acc_lin.append(valid_loss / len(valid_dl))
    acc_lin.append(accuracy(model(xb), yb))
    
fig = go.Figure()
x = np.arange(len(acc_lin))
fig.add_trace(go.Scatter(x=x, y=acc_lin, mode='lines'))
fig.update_layout(
        title='Accuracy across epochs',
        xaxis_title="epoch / test frequency",
        yaxis_title="Accuracy")



Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



  0%|          | 0/2 [00:00<?, ?it/s]

###### d=750

In [41]:
epochs=2

def get_model_linear(d):
  model = Mnist_Logistic_constrained(d)#model(x)
  return model, optim.Adam(model.parameters(), lr=5*1e-4)

model,opt=get_model_linear(750)

#scheduler = LinearLR(opt, start_factor=1,end_factor=0.05, total_iters=epochs)
#scheduler = MultiStepLR(opt, milestones=[1], gamma=0.5)
# with MultiStepLR(opt, milestones=[1,2,3,4], gamma=0.6) and d=700 we got 0.84 accuracy

acc_lin=[accuracy(model(xb), yb)]
for epoch in tqdm(range(epochs)):
  model.train()
  for xb, yb in train_dl:
    pred = model(xb)
    loss = loss_func(pred, yb)

    loss.backward()
    opt.step()
    opt.zero_grad()

    #scheduler.step()  
  model.eval()
  with torch.no_grad():
    #valid_loss = sum(loss_func(model(xb), yb) for xb, yb in valid_dl)
    #acc_lin.append(valid_loss / len(valid_dl))
    acc_lin.append(accuracy(model(xb), yb))
    
fig = go.Figure()
x = np.arange(len(acc_lin))
fig.add_trace(go.Scatter(x=x, y=acc_lin, mode='lines'))
fig.update_layout(
        title='Accuracy across epochs',
        xaxis_title="epoch / test frequency",
        yaxis_title="Accuracy")



Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



  0%|          | 0/2 [00:00<?, ?it/s]

###### d=725 (two examples)

In [50]:
epochs=2

def get_model_linear(d):
  model = Mnist_Logistic_constrained(d)#model(x)
  return model, optim.Adam(model.parameters(), lr=5*1e-4)

model,opt=get_model_linear(725)

#scheduler = LinearLR(opt, start_factor=1,end_factor=0.05, total_iters=epochs)
#scheduler = MultiStepLR(opt, milestones=[1], gamma=0.5)
# with MultiStepLR(opt, milestones=[1,2,3,4], gamma=0.6) and d=700 we got 0.84 accuracy

acc_lin=[accuracy(model(xb), yb)]
for epoch in tqdm(range(epochs)):
  model.train()
  for xb, yb in train_dl:
    pred = model(xb)
    loss = loss_func(pred, yb)

    loss.backward()
    opt.step()
    opt.zero_grad()

    #scheduler.step()  
  model.eval()
  with torch.no_grad():
    #valid_loss = sum(loss_func(model(xb), yb) for xb, yb in valid_dl)
    #acc_lin.append(valid_loss / len(valid_dl))
    acc_lin.append(accuracy(model(xb), yb))
    
fig = go.Figure()
x = np.arange(len(acc_lin))
fig.add_trace(go.Scatter(x=x, y=acc_lin, mode='lines'))
fig.update_layout(
        title='Accuracy across epochs',
        xaxis_title="epoch / test frequency",
        yaxis_title="Accuracy")



Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



  0%|          | 0/2 [00:00<?, ?it/s]

In [66]:
epochs=2

def get_model_linear(d):
  model = Mnist_Logistic_constrained(d)#model(x)
  return model, optim.Adam(model.parameters(), lr=5*1e-4)

model,opt=get_model_linear(725)

#scheduler = LinearLR(opt, start_factor=1,end_factor=0.05, total_iters=epochs)
#scheduler = MultiStepLR(opt, milestones=[1], gamma=0.5)
# with MultiStepLR(opt, milestones=[1,2,3,4], gamma=0.6) and d=700 we got 0.84 accuracy

acc_lin=[accuracy(model(xb), yb)]
for epoch in tqdm(range(epochs)):
  model.train()
  for xb, yb in train_dl:
    pred = model(xb)
    loss = loss_func(pred, yb)

    loss.backward()
    opt.step()
    opt.zero_grad()

    #scheduler.step()  
  model.eval()
  with torch.no_grad():
    #valid_loss = sum(loss_func(model(xb), yb) for xb, yb in valid_dl)
    #acc_lin.append(valid_loss / len(valid_dl))
    acc_lin.append(accuracy(model(xb), yb))
    
fig = go.Figure()
x = np.arange(len(acc_lin))
fig.add_trace(go.Scatter(x=x, y=acc_lin, mode='lines'))
fig.update_layout(
        title='Accuracy across epochs',
        xaxis_title="epoch / test frequency",
        yaxis_title="Accuracy")



Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



  0%|          | 0/2 [00:00<?, ?it/s]

###### d=700

In [42]:
epochs=2

def get_model_linear(d):
  model = Mnist_Logistic_constrained(d)#model(x)
  return model, optim.Adam(model.parameters(), lr=5*1e-4)

model,opt=get_model_linear(700)

#scheduler = LinearLR(opt, start_factor=1,end_factor=0.05, total_iters=epochs)
#scheduler = MultiStepLR(opt, milestones=[1], gamma=0.5)
# with MultiStepLR(opt, milestones=[1,2,3,4], gamma=0.6) and d=700 we got 0.84 accuracy

acc_lin=[accuracy(model(xb), yb)]
for epoch in tqdm(range(epochs)):
  model.train()
  for xb, yb in train_dl:
    pred = model(xb)
    loss = loss_func(pred, yb)

    loss.backward()
    opt.step()
    opt.zero_grad()

    #scheduler.step()  
  model.eval()
  with torch.no_grad():
    #valid_loss = sum(loss_func(model(xb), yb) for xb, yb in valid_dl)
    #acc_lin.append(valid_loss / len(valid_dl))
    acc_lin.append(accuracy(model(xb), yb))
    
fig = go.Figure()
x = np.arange(len(acc_lin))
fig.add_trace(go.Scatter(x=x, y=acc_lin, mode='lines'))
fig.update_layout(
        title='Accuracy across epochs',
        xaxis_title="epoch / test frequency",
        yaxis_title="Accuracy")



Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



  0%|          | 0/2 [00:00<?, ?it/s]

###### d=650

In [43]:
epochs=2

def get_model_linear(d):
  model = Mnist_Logistic_constrained(d)#model(x)
  return model, optim.Adam(model.parameters(), lr=5*1e-4)

model,opt=get_model_linear(650)

#scheduler = LinearLR(opt, start_factor=1,end_factor=0.05, total_iters=epochs)
#scheduler = MultiStepLR(opt, milestones=[1], gamma=0.5)
# with MultiStepLR(opt, milestones=[1,2,3,4], gamma=0.6) and d=700 we got 0.84 accuracy

acc_lin=[accuracy(model(xb), yb)]
for epoch in tqdm(range(epochs)):
  model.train()
  for xb, yb in train_dl:
    pred = model(xb)
    loss = loss_func(pred, yb)

    loss.backward()
    opt.step()
    opt.zero_grad()

    #scheduler.step()  
  model.eval()
  with torch.no_grad():
    #valid_loss = sum(loss_func(model(xb), yb) for xb, yb in valid_dl)
    #acc_lin.append(valid_loss / len(valid_dl))
    acc_lin.append(accuracy(model(xb), yb))
    
fig = go.Figure()
x = np.arange(len(acc_lin))
fig.add_trace(go.Scatter(x=x, y=acc_lin, mode='lines'))
fig.update_layout(
        title='Accuracy across epochs',
        xaxis_title="epoch / test frequency",
        yaxis_title="Accuracy")



Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



  0%|          | 0/2 [00:00<?, ?it/s]

##### Summary of training for d=650,700,725,750,800

In [39]:
lista_accuracy=[0.84,0.875,0.96,0.9,0.9]
fig = go.Figure()
x=[650,700,725,750,800]
fig.add_trace(go.Scatter(x=x, y=lista_accuracy,name='Accuracy', mode='markers',marker=dict(
                            size=30)))
fig.add_trace(go.Scatter(x=[650,675,700,725,750,775,800], y=[0.88,0.88,0.88,0.88,0.88,0.88,0.88],name='Treshold', line = dict(color='firebrick', width=4)))
fig.update_layout(
        title='Accuracy across subspace dimension',
        xaxis_title="subspace dimension",
        yaxis_title="Accuracy on test set")
fig.update_layout(
    autosize=False,
    width=500,
    height=400,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
    paper_bgcolor="LightSteelBlue",
)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Black')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='Black')
fig.update_layout(
    xaxis = dict(
        tickmode = 'linear',
        tick0 = 650,
        dtick = 25
    )
)
fig.update_layout(
    yaxis = dict(
        tickmode = 'linear',
        tick0 = 0.8,
        dtick = 0.025
    )
)

#### Training for the hypersphere

##### Trained networks

###### d=800

In [46]:
k=2

def get_model_ell(d):
  model = Mnist_ellipsoid(d)#model(x)
  return model, optim.Adam(model.parameters(), lr=6e-3)

model_ell,opt_ell=get_model_ell(800)

#scheduler_ell =  MultiStepLR(opt_ell, milestones=[0,1,2,3,4], gamma=0.7)
scheduler_ell =  LinearLR(opt_ell, start_factor=1,end_factor=0.7, total_iters=k)
epochs=k
acc_hyp=[accuracy(model_ell(xb), yb)]

scheduler_ell =  LinearLR(opt_ell, start_factor=1,end_factor=0.8, total_iters=k)
for epoch in tqdm(range(k)):
  model_ell.train()
  for xb, yb in train_dl:
    pred = model_ell(xb)
    loss = loss_func(pred, yb)

    loss.backward()
    opt_ell.step()
    opt_ell.zero_grad()

    scheduler_ell.step()  
  model_ell.eval()
  with torch.no_grad():
    acc_hyp.append(accuracy(model_ell(xb), yb))
    

fig = go.Figure()
x = np.arange(len(acc_hyp))
fig.add_trace(go.Scatter(x=x, y=acc_hyp, mode='lines'))

  0%|          | 0/2 [00:00<?, ?it/s]


Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



###### d=750

In [49]:

k=4

def get_model_ell(d):
  model = Mnist_ellipsoid(d)#model(x)
  return model, optim.Adam(model.parameters(), lr=6e-3)

model_ell,opt_ell=get_model_ell(750)

scheduler_ell =  LinearLR(opt_ell, start_factor=1,end_factor=0.8, total_iters=k)

epochs=k
acc_hyp=[accuracy(model_ell(xb), yb)]


for epoch in tqdm(range(k)):
  model_ell.train()
  for xb, yb in train_dl:
    pred = model_ell(xb)
    loss = loss_func(pred, yb)

    loss.backward()
    opt_ell.step()
    opt_ell.zero_grad()

    #scheduler_ell.step()  
  model_ell.eval()
  with torch.no_grad():
    acc_hyp.append(accuracy(model_ell(xb), yb))
    

fig = go.Figure()
x = np.arange(len(acc_hyp[0:8]))
fig.add_trace(go.Scatter(x=x, y=acc_hyp[0:8], mode='lines'))


###### d=725

In [30]:
# trained with r=1.5 instead of 0.15
k=1
'''
def get_model_ell(d):
  model = Mnist_ellipsoid(d)#model(x)
  return model, optim.Adam(model.parameters(), lr=6e-3)

model_ell,opt_ell=get_model_ell(725)

scheduler_ell =  LinearLR(opt_ell, start_factor=1,end_factor=0.7, total_iters=k)

epochs=k
acc_hyp=[accuracy(model_ell(xb), yb)]

'''
scheduler_ell =  LinearLR(opt_ell, start_factor=1,end_factor=0.5, total_iters=k)

for epoch in tqdm(range(k)):
  model_ell.train()
  for xb, yb in train_dl:
    pred = model_ell(xb)
    loss = loss_func(pred, yb)

    loss.backward()
    opt_ell.step()
    opt_ell.zero_grad()

    scheduler_ell.step()  
  model_ell.eval()
  with torch.no_grad():
    acc_hyp.append(accuracy(model_ell(xb), yb))
    

fig = go.Figure()
x = np.arange(len(acc_hyp))
fig.add_trace(go.Scatter(x=x, y=acc_hyp, mode='lines'))


  0%|          | 0/1 [00:00<?, ?it/s]


Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



###### d=700

In [34]:
k=5

def get_model_ell(d):
  model = Mnist_ellipsoid(d)#model(x)
  return model, optim.Adam(model.parameters(), lr=6e-3)

model_ell,opt_ell=get_model_ell(700)

epochs=k
acc_hyp=[accuracy(model_ell(xb), yb)]

scheduler_ell =  LinearLR(opt_ell, start_factor=1,end_factor=0.2, total_iters=k)

for epoch in tqdm(range(k)):
  model_ell.train()
  for xb, yb in train_dl:
    pred = model_ell(xb)
    loss = loss_func(pred, yb)

    loss.backward()
    opt_ell.step()
    opt_ell.zero_grad()

    scheduler_ell.step()  
  model_ell.eval()
  with torch.no_grad():
    acc_hyp.append(accuracy(model_ell(xb), yb))
    

fig = go.Figure()
x = np.arange(len(acc_hyp))
fig.add_trace(go.Scatter(x=x, y=acc_hyp, mode='lines'))


  0%|          | 0/1 [00:00<?, ?it/s]


Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.



##### Summary of training for d=700, 725, 750, 800

In [47]:
lista_accuracy_ell=[0.75,0.8,0.875,0.9]
fig = go.Figure()
x_ell=[700,725,750,800]
fig.add_trace(go.Scatter(x=x_ell, y=lista_accuracy_ell,name='Accuracy', mode='markers',marker=dict(
                            size=30)))
fig.add_trace(go.Scatter(x=[700,725,750,800], y=[0.88,0.88,0.88,0.88],name='Treshold', line = dict(color='firebrick', width=4)))

fig.update_layout(
        title='Accuracy across subspace dimension',
        xaxis_title="subspace dimension",
        yaxis_title="Accuracy on test set")
fig.update_layout(
    autosize=False,
    width=500,
    height=400,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
    paper_bgcolor="LightSteelBlue",
)
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Black')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='Black')
fig.update_layout(
    xaxis = dict(
        tickmode = 'linear',
        tick0 = 700,
        dtick = 25
    )
)
fig.update_layout(
    yaxis = dict(
        tickmode = 'linear',
        tick0 = 0.75,
        dtick = 0.025
    )
)