In [1]:
import numpy as np
import torch
import matplotlib.pyplot as plt
import random

In [2]:
#x_train_orig = np.arange(-2.4, 3.2, 0.8)
#x_train_orig = np.arange(-2.4, 2.8, 0.4)
#x_train_orig = np.arange(-2.4, 2.6, 0.2)
x_train_orig = np.arange(-2.4, 2.5, 0.1)

In [3]:
class FullModel(torch.nn.Module):
    def __init__(self):
        super(FullModel, self).__init__()
        self.fc1 = torch.nn.Linear(1, 5)
        self.fc2 = torch.nn.Linear(5, 5)
        self.fc3 = torch.nn.Linear(5, 1)
        self.relu = torch.nn.ReLU()
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

In [4]:
def func(x):
    x = x.astype(float)
    return np.power(x, 1) * np.power(np.sin(1.0 / 3.0 * x), 2)

def avg_l2_diff(y1, y2):
    return np.average(np.power(y1-y2, 2))

In [5]:
def train(model, optimizer, criterion, epoch):
    model.train()
    
    running_loss  = torch.tensor(0.0)

    for batch_idx, current_batch in enumerate(train_loader):     
        inp, current_batch_y = current_batch[0],        current_batch[1]

        optimizer.zero_grad()
        output = model(inp)
        gndtruth = current_batch_y

        loss = criterion(output, gndtruth)
        loss.backward()
        optimizer.step()
        running_loss  += loss.item()

    running_loss  = running_loss  / len(train_loader)
    
    if epoch % 20 == 0:
        print("Epoch: {}, Average loss: {:15.8f}".format(epoch, running_loss))

In [6]:
l2_diff_tot_before = []
l2_diff_tot_after = []
for seed in np.arange(20):
    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.use_deterministic_algorithms(True)
    
    model = FullModel()

    x_train = x_train_orig
    nsample = x_train.shape[0] - 1

    y_train = func(x_train)
    x_train_torch = torch.from_numpy(x_train).float()
    y_train_torch = torch.from_numpy(y_train).float()
    train_dataset = torch.utils.data.TensorDataset(x_train_torch, y_train_torch)
    train_loader = torch.utils.data.DataLoader(
        train_dataset, shuffle=True)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.001, weight_decay=0.01)
    criterion = torch.nn.MSELoss()
    
    for epoch in range(1, 1000):
        train(model, optimizer, criterion, epoch)
        
    x_pred_np = np.arange(-2.5, 2.5, 0.0001).reshape(-1, 1)
    x_pred = torch.from_numpy(x_pred_np).float()
    y_true = func(x_pred_np)
    y_pred = model(x_pred)
    x_np = x_pred.numpy().reshape(-1)
    y_pred_np = y_pred.detach().numpy().reshape(-1)
    y_true_np = y_true.reshape(-1)
    l2_diff = avg_l2_diff(y_pred_np, y_true_np)
    l2_diff_tot_before.append(l2_diff)
    
    
    rng = np.random.default_rng(seed)
    res = np.empty([0])
    w = np.power(y_pred_np - y_true_np, 2)
    w = w / np.sum(w)
    sz = w.shape[0]
    freq = rng.multinomial(nsample, w)

    for i in range(sz):
        if freq[i]:
            res = np.concatenate((res, [x_np[i]] * freq[i]), axis=0)
    print(res)    
    
    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.use_deterministic_algorithms(True)

    model = FullModel()
    x_train = x_train_orig
    x_train = np.concatenate((x_train, res), axis=0)
    y_train = func(x_train)
    x_train_torch = torch.from_numpy(x_train).float()
    y_train_torch = torch.from_numpy(y_train).float()
    train_dataset = torch.utils.data.TensorDataset(x_train_torch, y_train_torch)
    train_loader = torch.utils.data.DataLoader(
        train_dataset, shuffle=True)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.001, weight_decay=0.001)
    criterion = torch.nn.MSELoss()

    for epoch in range(1, 1000):
        train(model, optimizer, criterion, epoch)

    x_pred_np = np.arange(-2.5, 2.5, 0.0001).reshape(-1, 1)
    x_pred = torch.from_numpy(x_pred_np).float() 
    y_true = func(x_pred_np)
    y_pred = model(x_pred)
    x_np = x_pred.numpy().reshape(-1)
    y_pred_np = y_pred.detach().numpy().reshape(-1)
    y_true_np = y_true.reshape(-1)  
    l2_diff = avg_l2_diff(y_pred_np, y_true_np)
    l2_diff_tot_after.append(l2_diff)

Epoch: 20, Average loss:      0.22422032
Epoch: 40, Average loss:      0.14478670
Epoch: 60, Average loss:      0.09809522
Epoch: 80, Average loss:      0.07590454
Epoch: 100, Average loss:      0.06625000
Epoch: 120, Average loss:      0.06117123
Epoch: 140, Average loss:      0.05779228
Epoch: 160, Average loss:      0.05498500
Epoch: 180, Average loss:      0.05259483
Epoch: 200, Average loss:      0.05033115
Epoch: 220, Average loss:      0.04832476
Epoch: 240, Average loss:      0.04641450
Epoch: 260, Average loss:      0.04449780
Epoch: 280, Average loss:      0.04283988
Epoch: 300, Average loss:      0.04113560
Epoch: 320, Average loss:      0.03950005
Epoch: 340, Average loss:      0.03796881
Epoch: 360, Average loss:      0.03653057
Epoch: 380, Average loss:      0.03528649
Epoch: 400, Average loss:      0.03401506
Epoch: 420, Average loss:      0.03283570
Epoch: 440, Average loss:      0.03170145
Epoch: 460, Average loss:      0.03066811
Epoch: 480, Average loss:      0.02965

In [7]:
print(l2_diff_tot_before)
print(l2_diff_tot_after)

[0.016667973958483292, 0.012788086319150776, 0.014258608088119367, 0.017853177280074805, 0.01994109915485943, 0.0063699062012881, 0.021757396764904915, 0.03348564486780929, 0.013804212304761495, 0.01578091803603391, 0.017269762894411896, 0.010195281307745949, 0.04263667780627963, 0.008859950399989753, 0.00643093491971564, 0.01779780328672803, 0.007848463498285482, 0.0075397873165085, 0.007681530815691295, 0.011289814470988492]
[0.0011957705612759353, 0.0015501585704295814, 0.0019258351452706875, 0.0014071074234319832, 0.0012893635183113148, 0.0017606986972004933, 0.0022094289064506674, 0.0012414928768683226, 0.0017250439251175536, 0.0015031451325773569, 0.0017061627210596226, 0.0024107673046774697, 0.03181189495084362, 0.002198122370364655, 0.0010993549159954567, 0.0015119307167543621, 0.0011823229872916275, 0.0018334698606220499, 0.0010409738870690395, 0.0012858761632944798]
