In [9]:
import torch
from torch import nn
import matplotlib.pyplot as plt
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [13]:
class UniNN(nn.Module):
    def __init__(self, hidden):
        super(UniNN, self).__init__()
        self.hidden = hidden       
        self.fc1 = nn.Linear(1, self.hidden)           
        self.activation = nn.ReLU()                     
        self.fc2 = nn.Linear(self.hidden, 1)
        self.test_loss_reached = True         

    def forward(self, x):
        out_1 = self.activation(self.fc1(x))
        return self.fc2(out_1)

    def get_dataloader(self,f,num_samples=1000, batch_size = 32):
        X = torch.vstack((torch.rand(num_samples, 1), torch.zeros(num_samples //20 ,1)))
        train_dataset = torch.utils.data.TensorDataset(X, f(X))
        train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        return train_dataloader

    def fit(self, dataloader, dataloader_test, epochs=100, lr=0.001, decay = 1e-3):
        self.to(device)
        criterion = nn.MSELoss()
        optimizer = torch.optim.RAdam(self.parameters(), lr=lr, weight_decay=decay)
        train_losses = []
        test_losses = []
        iters = 0
        epoch = 0
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.1)
        while True:
            epoch += 1
            self.train()
            running_train_loss = 0.0
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = self(inputs)
                loss = criterion(outputs, labels)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                running_train_loss += loss.item()
            avg_train_loss = running_train_loss / len(dataloader)
            train_losses.append(avg_train_loss)
            self.eval() 
            running_test_loss = 0.0
            with torch.no_grad():
                for inputs, labels in dataloader_test:
                    outputs = self(inputs.to(device))
                    loss = criterion(outputs, labels.to(device))
                    running_test_loss += loss.item()
            avg_test_loss = running_test_loss / len(dataloader_test)
            test_losses.append(avg_test_loss)
            if avg_test_loss < 4e-3:
                iters += 1
            else:
                iters = 0
            if iters == 10:
                break
            #print(f""" Epoch:{epoch}, trainloss:{avg_train_loss}, testloss:{avg_test_loss}, lr = {optimizer.param_groups[0]["lr"]}""")
            scheduler.step()
            if optimizer.param_groups[0]["lr"] < 2e-5:
                self.test_loss_reached = False
                break
        print(self.hidden, decay, self.test_loss_reached, test_losses[-5:])
        self.compute_lipschitz_constant()
        self.model_err_sup_norm()
        return test_losses
    def plot_model(self, f, title):
        x_train = torch.linspace(0,1,1000)
        with torch.no_grad():
            y_pred = model(x_train.view(-1,1))
        print("estimated_sup_norm_error", torch.max(torch.abs(y_pred.view(-1) - f(x_train))))
        plt.plot(x_train.numpy(), y_pred.numpy(), label="Model")
        plt.plot(x_train.numpy(), f(x_train.numpy()), label="Objective")
        plt.title("Model Predictions vs Data" + ' decay:' + title)
        plt.xlabel("x")
        plt.ylabel("y")
        plt.legend()
        plt.show()
    def model_err_sup_norm(self):
        x_train = torch.linspace(0,1,1000)
        with torch.no_grad():
            y_pred = model(x_train.view(-1,1))
        self.sup_err = torch.max(torch.abs(y_pred.view(-1) - f(x_train)))

    def compute_lipschitz_constant(self):
        self.L = 1
        for name, param in model.named_parameters():
            if 'weight' in name:
                self.L *= torch.norm(param)
        

In [14]:
def f(X):
    return X**0.23

In [15]:
model = UniNN(16)
dataloader_train = model.get_dataloader(f)
dataloader_test = model.get_dataloader(f, num_samples=200)
models = {}
L = []
decays = [0,0.1,1e-2,1e-3, 1e-5] 
decays = [0.1,1e-2]
for i in range(14,16):
    diction = {2**i: []}
    for decay in decays:
        model = UniNN(2**i)
        loss = model.fit(lr = 1e-3, dataloader=dataloader_train, dataloader_test=dataloader_test, decay=decay)
        diction[2**i].append(model)
    models.update(diction)

16384 0.1 False [0.027486796091709818, 0.02734549716114998, 0.02728975165103163, 0.02630654988544328, 0.02593830387507166]


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat1 in method wrapper_CUDA_addmm)