### Synthetic dataset generation

In [2]:
import torch
from torch.utils.data import TensorDataset, DataLoader

torch.manual_seed(1337)  

# Parameters
num_samples = 1000  #Corresponds to m in the paper
dim_x = 10  
dim_y = 10  

# Dataset creation

# Matrix creation
def random_positive_diagonal_matrix(n, m):
    # Generate evenly spaced positive distinct values
    values = torch.linspace(1, 10, steps=n)
    # Shuffle the values to make them random
    values = values[torch.randperm(n)]
    # Create a diagonal matrix of size (n x n)
    D = torch.diag(values)
    # If n != m, pad or truncate the diagonal matrix to size (n x m)
    if n < m:
        D = torch.cat([D, torch.zeros(n, m - n)], dim=1)
    elif n > m:
        D = D[:m, :m]
    return D
    
def random_orthogonal_matrix(n,m):
    U, _ = torch.linalg.qr(torch.randn(n, m), mode='reduced')
    return U
    
def diagonal_matrix_with_d_distinct_values(m, n, d):
    min_dim = min(m, n)
    if d > min_dim or d < 1:
        raise ValueError("d must satisfy 1 <= d <= min(m, n)")

    # Generate d distinct positive values
    values = torch.linspace(1, 10, steps=d)

    # Repeat the distinct values to fill the diagonal
    diagonal_values = torch.cat([distinct_values, distinct_values[:min_dim - d]])

    # Shuffle the diagonal values to randomize their order
    diagonal_values = diagonal_values[torch.randperm(min_dim)]

    # Create a rectangular diagonal matrix
    D = torch.zeros(m, n)
    for i in range(min_dim):
        D[i, i] = diagonal_values[i]
    return D

def diagonal_matrix_with_d_zeros(m, n, d):

    min_dim = min(m, n)
    if d > min_dim or d < 0:
        raise ValueError("d must satisfy 0 <= d <= min(m, n)")

    # Generate non-zero values for the diagonal
    non_zero_values = torch.ones(min_dim - d)  # You can replace this with other positive values

    # Combine zeros and non-zero values
    diagonal_values = torch.cat([torch.zeros(d), non_zero_values])

    # Shuffle the diagonal values to randomize the positions of zeros
    diagonal_values = diagonal_values[torch.randperm(min_dim)]

    # Create a rectangular diagonal matrix
    D = torch.zeros(m, n)
    for i in range(min_dim):
        D[i, i] = diagonal_values[i]

    return D
    
#Satisfying all conditions
U=random_orthogonal_matrix(dim_y,dim_y)
V=random_orthogonal_matrix(num_samples,num_samples)
D=random_positive_diagonal_matrix(dim_y, num_samples)

U_p=random_orthogonal_matrix(dim_y,dim_y)
D_p=random_positive_diagonal_matrix(dim_y, num_samples)

X=U @ D @ V.T
Y=U_p @ D_p @ V.T

#Not satisfying condition (1)
D2_p=diagonal_matrix_with_d_distinct_values(dim_y,num_samples , dim_y/2)
Y2=U_p @ D2_p @ V.T
X2=U @ D @ V.T #still ok
#Not satisfying condition (2)
D3=diagonal_matrix_with_d_zeros(dim_x,num_samples , dim_x/2)
Y3=U_p @ D_p @ V.T #still ok
#Not satisfying condition (3)




X = torch.randn(num_samples, dim_x)
true_weights = torch.randn(dim_x, dim_y)
Y = X @ true_weights + 0.1 * torch.randn(num_samples, dim_y) #Y = XW + noise

# Create a TensorDataset and DataLoader
dataset = TensorDataset(X, Y)
batch_size = 32
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)


### Model instantiation, optimizer and loss function

In [2]:
import torch.nn as nn
import sys
import os

# Import 
current_dir = os.getcwd()
parent_dir = os.path.abspath(os.path.join(current_dir, '..'))
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

from Utils.classes import LinearNN

# Define hidden layer dimensions
hidden_dims = [64, 32]

# Instantiate the model
# Directly pass hidden dimensions as positional arguments
model = LinearNN(dim_x=10, dim_y=1, hidden_dims=hidden_dims)


# Define the loss function
criterion = nn.MSELoss()

# Define the optimizer
learning_rate = 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

### Training loop

In [3]:
# Training parameters
num_epochs = 100

# Training loop
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    epoch_loss = 0.0

    for batch_X, batch_Y in dataloader:
        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(batch_X)

        # Compute the loss
        loss = criterion(outputs, batch_Y)

        # Backward pass
        loss.backward()

        # Update the weights
        optimizer.step()

        # Accumulate loss
        epoch_loss += loss.item() * batch_X.size(0)

    # Compute average loss for the epoch
    avg_loss = epoch_loss / num_samples

    if (epoch + 1) % 10 == 0 or epoch == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

Epoch [1/100], Loss: 0.8296
Epoch [10/100], Loss: 0.0140
Epoch [20/100], Loss: 0.0193
Epoch [30/100], Loss: 0.0176
Epoch [40/100], Loss: 0.0141
Epoch [50/100], Loss: 0.0152
Epoch [60/100], Loss: 0.0134
Epoch [70/100], Loss: 0.0128
Epoch [80/100], Loss: 0.0225
Epoch [90/100], Loss: 0.0143
Epoch [100/100], Loss: 0.0152
