In [7]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import math
import numpy as np
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

In [8]:
class MLayerNN(nn.Module):
    def __init__(self, in_dim, out_dim, depth, N_params):
        super(MLayerNN, self).__init__()
        self.test_loss = []
        self.train_loss = []
        # Initialize layer sizes list
        layer_dims = [in_dim]  # First layer is input dim
        #h = int(N_params / ((in_dim + out_dim) * (depth)))
        #while (in_dim * h + (depth-3) * h**2 + h * out_dim) > N_params:
        #    h -= 1
        h = 4096 ### Number params hidden layer
        layer_dims += [int(h)] * (depth - 2)  # Set all hidden layers to hidden_dim
        layer_dims.append(out_dim)  # Last layer is output dim
        print(layer_dims)
        self.layers = nn.ModuleList([
            nn.Linear(layer_dims[i-1], layer_dims[i]) for i in range(1,depth)
        ])
        self.params = sum(p.numel() for p in self.parameters() if p.ndimension() > 1)
        self.activation = nn.ReLU()
        
    def forward(self, x):
        for layer in self.layers[:-1]:
            x = self.activation(layer(x))  # Apply ReLU activation
        return self.layers[-1](x)  # Final layer without activation

In [9]:
#### FRIEDMANN 1
from sklearn.datasets import make_friedman1
def get_loader(in_dim, noise, n_samples = 20000):
    # Set the seed for reproducibility
    seed = 42
    np.random.seed(seed)
    torch.manual_seed(seed)

    # Generate the Friedmann dataset
    X_train, y = make_friedman1(n_samples=int(n_samples * 0.8), n_features= in_dim, random_state=seed, noise=noise)
    y_train = np.expand_dims(y, axis=1)
    # Split into train and test sets (80% train, 20% test)
    X_test, y = make_friedman1(n_samples=int(n_samples * 0.2), n_features= in_dim, random_state=seed, noise=0.0)
    y_test = np.expand_dims(y, axis=1)
    # Convert numpy arrays to PyTorch tensors
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

    # Create TensorDataset for train and test sets
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

    # Set batch size and create DataLoader for training and testing
    batch_size = 1024
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
    return train_loader, test_loader



In [10]:
@torch.no_grad()
def compute_test_loss(test_loader, model):
    criterion = torch.nn.MSELoss()
    running_loss = 0.
    for batch, target in test_loader:
        outputs = model(batch)
        loss = criterion(target, outputs)
        running_loss += loss.item()
    return running_loss / len(test_loader)

In [11]:
import time
import dill
n_samples = 20000
in_dims = [5,100]
noises = [0.,1.]
epochs = 1000
train_losses = []
test_losses = []
adjust = True
for j,in_dim in enumerate(in_dims):
    for noise in noises:
        param = 1
        train_loader, test_loader = get_loader(in_dim, noise,n_samples)
        model = MLayerNN(in_dim, 1, depth = 3 , N_params = param)
        model.train()
        optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0)
        criterion = torch.nn.MSELoss()
        for epoch in range(epochs):
            running_loss = 0.0 
            for batch, target in train_loader:
                start_time = time.time()
                optimizer.zero_grad()
                outputs = model(batch)
                loss = criterion(target, outputs)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
            avg_loss = running_loss / len(train_loader)
            model.train_loss.append(avg_loss)
            test_l = compute_test_loss(test_loader, model)
            model.test_loss.append(test_l)
            print(f"Epoch [{epoch + 1}/{epochs}], Loss: {avg_loss:.6f}, test: {test_l:.6f}, lr: {optimizer.param_groups[0]['lr']:6f}")
        plt.plot(model.train_loss[-50:])
        plt.title(f'train_loss')
        plt.legend()
        plt.show()
        print("Training Complete!")
        #with open(f"models/NN_2048{noise}_{in_dim}.dill", "wb") as f:
        #    dill.dump(model, f)

[5, 4096, 1]


KeyboardInterrupt: 