In [None]:
import os.path
import random
import numpy as np
import torch
import matplotlib.pyplot as plt
from dataset import SimpleRegression
from models import SigmoidNet, ReLUNet, AbsNet, SigmoidNormNet, ReLUNormNet, weights_init_normal
from torch.utils.data import DataLoader
from main import bump_func
import torch.nn as nn

def plot_3d(array, txt=None):
    x = np.arange(0, array.shape[0])
    y = np.arange(0, array.shape[1])
    xv, vy = np.meshgrid(x, y)
    
    xv = np.reshape(xv, [-1])
    vy = np.reshape(vy, [-1])
    data = np.reshape(array, [-1])

    fig = plt.figure(figsize=(9, 9))
    ax = plt.axes(projection="3d")
    ax.scatter3D(xv, vy, np.abs(data), alpha=0.6, label="abs_weights")
    ax.set_xlabel("Number of Iteration")
    ax.set_ylabel("Neuron Index")
    ax.set_zlabel("Grad (abs) Value")
    ax.grid()
    ax.legend()
    if txt is not None:
        ax.set_title(txt)
    plt.show()

In [None]:
# === Config for reproduction
seed = 0
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# === Exp setups
total_epoch = 20
num_data = 2000
num_neurons = 24
num_layers = 32
net_type = "SigmoidNorm"
opt_type = "Adam"

# === Visualization Folder Initialization
save_fig_dir = os.path.join('./', "grad_vis_%d_%s" % (num_data, opt_type))
os.makedirs(save_fig_dir, exist_ok=True)
save_model_dir = os.path.join('./', "grad_%d_%s" % (num_data, opt_type))
os.makedirs(save_model_dir, exist_ok=True)




In [None]:
# === Init Dataset
x_1 = np.linspace(0, 1, num=num_data, endpoint=True)
y_1 = bump_func(x_1)
train_set_1 = SimpleRegression(x_1, y_1)
train_loader_1 = DataLoader(train_set_1, batch_size=64, shuffle=True,
                            num_workers=2)
# === Init NN
if net_type == "ReLU":
    net = ReLUNet(hidden_neuron=num_neurons,
                  num_layer=num_layers)  # The visual proof network
elif net_type == "Sigmoid":
    net = SigmoidNet(hidden_neuron=num_neurons,
                     num_layer=num_layers)
elif net_type == "Leaky":
    net = ReLUNet(hidden_neuron=num_neurons,
                  num_layer=num_layers,
                  activation="leaky")
elif net_type == "Abs":
    net = AbsNet(hidden_neuron=num_neurons,
                 num_layer=num_layers, )
elif net_type == "SigmoidNorm":
    net = SigmoidNormNet(hidden_neuron=num_neurons,
                         num_layer=num_layers)
elif net_type == "LeakyNorm":
    net = ReLUNormNet(hidden_neuron=num_neurons,
                      num_layer=num_layers,
                      activation="leaky")
else:
    print("Undefined NN type. Check settings.")

# Optimizer Does not affect gradient
# but here use the optimizer to simulate realistic training grad component
net.apply(weights_init_normal)
if opt_type == "Adam":
    optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
elif opt_type == "SGD":
    optimizer = torch.optim.SGD(net.parameters(), lr=1)
else:
    print("Need to specify the optimizer")
net.to(device)
net.train()

# Exponential Decay Learning rate
decayRate = 0.96
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=decayRate)
loss_func = torch.nn.MSELoss()

save_dir = os.path.join(save_model_dir, "%s_L%d_N%d" % (net_type,
                                                        num_layers,
                                                        num_neurons))
os.makedirs(save_dir, exist_ok=True)

In [None]:

first_weights_grad_log = []
last_weights_grad_log = []

print("Check Network Type: ")
print(type(net))

for epoch in range(total_epoch):
    epoch_loss = 0
    for i, data in enumerate(train_loader_1):
        optimizer.zero_grad()

        input_tensor = data["input"].to(device)
        label_tensor = data["label"].to(device)

        output_tensor = net(input_tensor)
        loss = loss_func(output_tensor, label_tensor)
        loss.backward()
        
        # Plot Layer Weights
        layer_list = list(net.fc.children())
        first_grad = layer_list[0].weight.grad.data.detach().cpu().numpy().reshape(-1) 
        last_grad = layer_list[-2].weight.grad.data.detach().cpu().numpy().reshape(-1)
        first_weights_grad_log.append(first_grad)
        last_weights_grad_log.append(last_grad)
                    
        # BackProp with Optimizer
        optimizer.step()
        epoch_loss += loss.item()
    print("[Epoch {}] Total Training Loss:{}".format(epoch, epoch_loss))
    lr_scheduler.step()

In [None]:
# Now Plot the evolution of gradients during training
first_grad = np.asarray(first_weights_grad_log)
last_grad = np.asarray(last_weights_grad_log)

plot_3d(first_grad, txt="First Linear Layer Gradient")
plot_3d(last_grad, txt="Last Linear Layer Gradient")
