In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F
import torch.nn.init as init
from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence
from torch.optim.lr_scheduler import MultiStepLR

import numpy as np
import time 
from tqdm import tqdm

from generator_utils import *
from discriminator import *
from dataset import *

In [2]:
class GRU_plain(nn.Module):
    def __init__(self, input_size, embedding_size, hidden_size, num_layers, has_input=True, has_output=False, output_size=None):
        super(GRU_plain, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.has_input = has_input
        self.has_output = has_output

        if has_input:
            self.input = nn.Linear(input_size, embedding_size)
            self.rnn = nn.GRU(input_size=embedding_size, hidden_size=hidden_size, num_layers=num_layers,
                              batch_first=True)
        else:
            self.rnn = nn.GRU(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        if has_output:
            self.output = nn.Sequential(
                nn.Linear(hidden_size, embedding_size),
                nn.ReLU(),
                nn.Linear(embedding_size, output_size)
            )

        self.relu = nn.ReLU()
        # initialize
        self.hidden = None  # need initialize before forward run

        for name, param in self.rnn.named_parameters():
            if 'bias' in name:
                nn.init.constant_(param, 0.25)
            elif 'weight' in name:
                nn.init.xavier_uniform_(param,gain=nn.init.calculate_gain('sigmoid'))
        for m in self.modules():
            if isinstance(m, nn.Linear):
                m.weight = init.xavier_uniform_(m.weight, gain=nn.init.calculate_gain('relu'))

    def init_hidden(self, batch_size):
        return Variable(torch.zeros(self.num_layers, batch_size, self.hidden_size)).to(choose_device())

    def forward(self, input_raw, pack=False, input_len=None):
        if self.has_input:
            input = self.input(input_raw)
            input = self.relu(input)
        else:
            input = input_raw
        if pack:
            input = pack_padded_sequence(input, input_len, batch_first=True)
        output_raw, self.hidden = self.rnn(input, self.hidden)
        if pack:
            output_raw = pad_packed_sequence(output_raw, batch_first=True)[0]
        if self.has_output:
            output_raw = self.output(output_raw)
        # return hidden state at each time step
        return output_raw

In [33]:
def decode_adj(adj_output):
    '''
    From GraphRNN codebase
        recover to adj from adj_output
        note: here adj_output have shape (n-1)*m
    '''
    max_prev_node = adj_output.shape[1]
    adj = torch.zeros((adj_output.shape[0], adj_output.shape[0]))
    print(adj.size())
    reverse_adj = torch.flip(adj_output, dims=(1,))
    for i in range(adj_output.shape[0]):
        input_start = max(0, i - max_prev_node + 1)
        input_end = i + 1
        output_start = max_prev_node + max(0, i - max_prev_node + 1) - (i + 1)
        output_end = max_prev_node
        # adj[i, input_start:input_end] = adj_output[i,::-1][output_start:output_end]
        # print(adj[i, input_start:input_end].size())
        # print(reverse_adj[i,output_start:output_end][:, 0].size())
        adj[i, input_start:input_end] = reverse_adj[i,output_start:output_end][:, 0]
    adj_full = torch.zeros((adj_output.shape[0]+1, adj_output.shape[0]+1))
    n = adj_full.shape[0]
    adj_full[1:n, 0:n-1] = torch.tril(adj, 0)
    adj_full = adj_full + adj_full.T

    return adj_full

In [34]:
class GraphRNN(nn.Module):
    def __init__(self, args, device=choose_device()) -> None:
        super().__init__()
        self.args = args
        self.device = device
        self.rnn = GRU_plain(input_size=self.args.max_prev_node, embedding_size=self.args.embedding_size_rnn,
                        hidden_size=self.args.hidden_size_rnn, num_layers=self.args.num_layers, has_input=True,
                        has_output=True, output_size=self.args.hidden_size_rnn_output).to(self.device)
        self.output = GRU_plain(input_size=1, embedding_size=self.args.embedding_size_rnn_output,
                            hidden_size=self.args.hidden_size_rnn_output, num_layers=self.args.num_layers, has_input=True,
                            has_output=True, output_size=1).to(self.device)

        # load data state
        if args.load:
            fname = args.model_save_path + args.fname + 'lstm_' + str(args.load_epoch) + '.dat'
            self.rnn.load_state_dict(torch.load(fname))
            fname = args.model_save_path + args.fname + 'output_' + str(args.load_epoch) + '.dat'
            self.output.load_state_dict(torch.load(fname))

            args.lr = 0.00001
            epoch = args.load_epoch
            print('model loaded!, lr: {}'.format(args.lr))
        else:
            epoch = 1

    # ====Call these in training loop====
    def init_optimizer(self, lr):
        """Initialize optimizers and schedular for both RNNs"""
        self.optimizer_rnn = optim.Adam(list(self.rnn.parameters()), lr=lr)
        self.optimizer_output = optim.Adam(list(self.output.parameters()), lr=lr)
        self.scheduler_rnn = MultiStepLR(self.optimizer_rnn, milestones=self.args.milestones)
        self.scheduler_output = MultiStepLR(self.optimizer_output, milestones=self.args.milestones)
        return self.optimizer_rnn, self.optimizer_output, self.scheduler_rnn, self.scheduler_output

    def clear_gradient_models(self):
        self.rnn.zero_grad()
        self.output.zero_grad()

    def train(self, flag):
        if flag:
            self.rnn.train(True)
            self.output.train(True)
        else:
            self.rnn.train(False)
            self.output.train(False)

    def clear_gradient_opts(self):
        self.optimizer_rnn.zero_grad()
        self.optimizer_output.zero_grad()

    def all_steps(self):
        self.optimizer_rnn.step()
        self.optimizer_output.step()
        self.scheduler_rnn.step()
        self.scheduler_output.step()

    def sort_data_per_epoch(self, X, Y, length):
        x_unsorted = X.float()
        y_unsorted = Y.float()
        y_len_unsorted = length
        y_len_max = max(y_len_unsorted)
        x_unsorted = x_unsorted[:, 0:y_len_max, :]
        y_unsorted = y_unsorted[:, 0:y_len_max, :]
        y_len,sort_index = torch.sort(y_len_unsorted,0,descending=True)
        y_len = y_len.numpy().tolist()
        x = torch.index_select(x_unsorted,0,sort_index)
        y = torch.index_select(y_unsorted,0,sort_index)
        y_reshape = pack_padded_sequence(y,y_len,batch_first=True).data
        idx = [i for i in range(y_reshape.size(0)-1, -1, -1)]
        idx = torch.LongTensor(idx)
        y_reshape = y_reshape.index_select(0, idx)
        y_reshape = y_reshape.view(y_reshape.size(0),y_reshape.size(1),1)
        output_x = torch.cat((torch.ones(y_reshape.size(0),1,1),y_reshape[:,0:-1,0:1]),dim=1) # x's shape is determined by y's shape
        output_y = y_reshape
        output_y_len = []
        output_y_len_bin = np.bincount(np.array(y_len))
        for i in range(len(output_y_len_bin)-1,0,-1):
            count_temp = np.sum(output_y_len_bin[i:]) # count how many y_len is above i
            output_y_len.extend([min(i,y.size(2))]*count_temp) # put them in output_y_len; max value should not exceed y.size(2)

        # pack into variable
        x = Variable(x).to(self.device)
        y = Variable(y).to(self.device)
        output_x = Variable(output_x).to(self.device)
        output_y = Variable(output_y).to(self.device)
        batch_size = x_unsorted.size(0)
        return x, y, output_x, output_y, y_len, output_y_len, batch_size
    
    # ======================================

    def forward(self, noise, X, Y, length):
        """
        X: noise/latent vector
        args: arguments dictionary
        test_batch_size: number of graphs you want to generate
        """
        # provide a option to change number of graphs generated
        output_batch_size = self.args.test_batch_size
        input_hidden = torch.stack(self.rnn.num_layers*[noise]).to(self.device)
        self.rnn.hidden = input_hidden # expected shape: (num_layer, batch_size, hidden_size)

        x, y, output_x, output_y, y_len, output_y_len, _ = self.sort_data_per_epoch(X, Y, length)
        
        h = self.rnn(x, pack=True, input_len=y_len)
        h = pack_padded_sequence(h,y_len,batch_first=True).data # get packed hidden vector
        # reverse h
        idx = [i for i in range(h.size(0) - 1, -1, -1)]
        idx = Variable(torch.LongTensor(idx)).cuda()
        h = h.index_select(0, idx)
        hidden_null = Variable(torch.zeros(self.rnn.num_layers-1, h.size(0), h.size(1))).cuda()
        self.output.hidden = torch.cat((h.view(1,h.size(0),h.size(1)),hidden_null),dim=0) # num_layers, batch_size, hidden_size
        y_pred = self.output(output_x, pack=True, input_len=output_y_len)
        y_pred = F.sigmoid(y_pred)
        # clean
        y_pred = pack_padded_sequence(y_pred, output_y_len, batch_first=True)
        y_pred = pad_packed_sequence(y_pred, batch_first=True)[0]

        out = decode_adj(y_pred)
        print(out.size())
        # # TODO: change this part to noise vector might need resizing
        # # y_pred_long = Variable(torch.zeros(output_batch_size, self.args.max_num_node, self.args.max_prev_node)).to(self.device) # discrete prediction
        # y_pred_long = torch.zeros(output_batch_size, self.args.max_num_node, self.args.max_prev_node).to(self.device) # discrete prediction
        # # x_step = X.to(self.device) # shape:(batch_size, 1, self.args.max_prev_node)

        # x_step = torch.ones(output_batch_size, 1, self.args.max_prev_node).to(self.device)

        # # iterative graph generation
        # for i in range(self.args.max_num_node):
        #     # for each node
        #     # 1. we use rnn to create new node embedding
        #     # 2. we use output to create new edges

        #     # (1)
        #     h = self.rnn(x_step)
        #     hidden_null = torch.zeros(self.args.num_layers - 1, h.size(0), h.size(2)).to(self.device)
        #     x_step = torch.zeros(output_batch_size, 1, self.args.max_prev_node).to(self.device)
        #     output_x_step = torch.ones(output_batch_size, 1, 1).to(self.device)
        #     # (2)
        #     # self.output.hidden = torch.cat((h.permute(1,0,2), hidden_null), dim=0).to(self.device)
        #     # for j in range(min(self.args.max_prev_node,i+1)):
        #     #     output_y_pred_step = self.output(output_x_step)
        #     #     # print(output_y_pred_step.requires_grad)
        #     #     output_x_step = sample_sigmoid(output_y_pred_step, sample=True, sample_time=1, device=self.device)
        #     #     x_step[:,:,j:j+1] = output_x_step
        #     #     # self.output.hidden = Variable(self.output.hidden.data).to(self.device)
        #     # y_pred_long[:, i:i + 1, :] = x_step
        #     self.rnn.hidden = Variable(self.rnn.hidden.data).to(self.device)

        # y_pred_long_data = y_pred_long.long()

        return out

        # print(1)
        init_adj_pred = decode_adj(y_pred_long_data[0].cpu())
        adj_pred_list = torch.zeros((output_batch_size, init_adj_pred.size(0), init_adj_pred.size(1)))
        for i in range(output_batch_size):
            # adj_pred = decode_adj(y_pred_long_data[i].cpu().numpy())
            # adj_pred_list = np.append(adj_pred_list, adj_pred)
            # adj_pred_list.append(adj_pred)
            adj_pred_list[i, :, :] = decode_adj(y_pred_long_data[i].cpu())

        # return torch.Tensor(np.array(adj_pred_list))
        return adj_pred_list

In [35]:
loss = nn.MSELoss()

In [47]:
def train(args, train_inverter=False, num_layers=4, clamp_lower=-0.1, clamp_upper=0.1, lr=1e-3, betas=1e-5, lamb=0.1, loss_func='MSE', device=choose_device()):
    # get the dataset
    lr = 1e-4
    train, labels = get_dataset_with_label(args.graph_type) # entire dataset as train
    train_dataset = Graph_sequence_sampler_pytorch(train, labels, args)
    train_loader = get_dataloader_labels(train_dataset, args)
    noise_dim = args.hidden_size_rnn
    # print('noise dimension is: ', noise_dim)

    # initialize noise, optimizer and loss
    netG = GraphRNN(args=args)
    netD = NetD(stat_input_dim=128, stat_hidden_dim=64, num_stat=2)
    hg = list(netG.parameters())[2].register_hook(lambda grad: print(f"NetG parameter Update with gradient {grad}"))
    loss = nn.MSELoss()

    # check model parameters
    # for param in netD.parameters():
    #     print(param.name, param.data, param.requires_grad)
    # for param in netG.parameters():
    #     print(param.name, param.data, param.requires_grad)

    optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=[betas for _ in range(2)])
    G_optimizer_rnn, G_optimizer_output, G_scheduler_rnn, G_scheduler_output = netG.init_optimizer(lr=0.1) # initialize optimizers


    noise = torch.randn(args.batch_size, noise_dim).to(device)

    gen_iterations = 0
    for e in range(args.epochs):
        # for now, treat the input as adj matrices
        for i, data in tqdm(enumerate(train_loader), desc=f"Training epoch#{e+1}", total=len(train_loader)):
            X = data['x']
            Y = data['y']
            adj_mat = data['adj_mat']
            Y_len = data['len']

            # zero grad
            optimizerD.zero_grad()
            G_optimizer_rnn.zero_grad()
            G_optimizer_output.zero_grad()

            # skip uneven batch
            if adj_mat.size(0) != args.batch_size:
                continue
            
            # ========== Train Generator ==================
            netD.train(False)
            netG.train(True)
            # netG.clear_gradient_models()
            G_optimizer_rnn.zero_grad()
            G_optimizer_output.zero_grad()
            # in case our last batch was the tail batch of the dataloader,
            # make sure we feed a full batch of noise
            # noisev = noise.normal_(0,1)
            noisev = torch.randn(args.batch_size, noise_dim)
            # print(f"noise size: {noisev.size()}")
            fake = netG(noisev, X, Y, Y_len)
            # fake_tensor = netD(fake)
            # output = Variable(loss(fake, torch.ones((32, 6200)).cuda()), requires_grad=True)
            # print(fake.size())
            # target = torch.((2405, 2405)).to('cuda')
            output = torch.mean(fake.cuda())
            output.backward()
            print(output.grad)

            # errG = fake_tensor
            # errG.backward()
            G_optimizer_rnn.step()
            G_optimizer_output.step()
            # # for p in netG.parameters()[0]:
            # # netG.all_steps()
            # gen_iterations += 1

            print(f"errG for generator: {output.item()}")


In [48]:
args = Args()

In [49]:
train(args=args)

calculating max previous node, total iteration: 12000
iter 0 times
iter 2400 times
iter 4800 times
iter 7200 times
iter 9600 times
max previous node: 10




torch.Size([2404, 2404])
torch.Size([2405, 2405])


  return self._grad
Training epoch#1:   3%|▎         | 1/35 [00:09<05:35,  9.86s/it]

NetG parameter Update with gradient tensor([[ 2.0595e-10,  8.9589e-12,  2.0067e-10,  ...,  4.7315e-10,
          3.5230e-10,  4.7792e-10],
        [ 7.8863e-10,  5.2105e-11,  5.9448e-10,  ...,  1.8314e-09,
          5.4963e-10,  1.2023e-09],
        [-1.8632e-10, -7.7048e-12, -1.5629e-10,  ..., -3.9626e-11,
         -2.0188e-10, -1.1530e-10],
        ...,
        [ 6.4004e-08,  3.2564e-09,  7.2966e-08,  ...,  2.7935e-07,
          1.1535e-07,  2.2295e-07],
        [ 1.8974e-07,  8.9252e-09,  2.4370e-07,  ...,  9.2353e-07,
          3.8537e-07,  7.3718e-07],
        [-8.9927e-09, -8.3062e-10, -1.0176e-08,  ..., -3.4090e-08,
         -1.7478e-08, -2.9322e-08]], device='cuda:0')
None
errG for generator: 0.004558607004582882
torch.Size([1382, 1382])
torch.Size([1383, 1383])


Training epoch#1:   6%|▌         | 2/35 [00:11<02:50,  5.18s/it]

NetG parameter Update with gradient tensor([[-2.4963e-09, -4.7767e-10, -2.9195e-09,  ..., -2.6322e-08,
         -4.1873e-08, -1.5409e-08],
        [-7.7858e-09, -1.1108e-09, -6.6949e-09,  ..., -7.3920e-08,
         -9.0500e-08, -4.1059e-08],
        [ 2.0892e-10,  4.5747e-11,  2.6799e-10,  ...,  2.4096e-09,
          3.4898e-09,  1.4116e-09],
        ...,
        [ 5.6885e-09,  9.4744e-10,  7.5777e-09,  ...,  6.2607e-08,
          8.9827e-08,  3.2734e-08],
        [ 2.7659e-09,  6.2927e-10,  4.2778e-09,  ...,  3.0808e-08,
          4.4166e-08,  1.7126e-08],
        [ 9.0999e-10,  1.8117e-10,  1.1023e-09,  ...,  9.5203e-09,
          1.4409e-08,  5.4537e-09]], device='cuda:0')
None
errG for generator: 0.006219551432877779
torch.Size([1868, 1868])
torch.Size([1869, 1869])


Training epoch#1:   9%|▊         | 3/35 [00:16<02:36,  4.88s/it]

NetG parameter Update with gradient tensor([[ 3.7298e-19,  1.6447e-20,  1.2956e-17,  ..., -3.0166e-14,
         -1.1413e-13,  3.2924e-16],
        [ 1.2058e-19, -3.3435e-19,  8.2476e-18,  ..., -3.2648e-15,
         -1.3714e-14,  5.0281e-16],
        [ 2.1612e-13,  1.4217e-13,  1.7947e-12,  ..., -1.2123e-11,
         -1.5749e-11, -9.3801e-12],
        ...,
        [-5.2800e-13, -4.5570e-13, -8.1489e-12,  ..., -1.6548e-10,
         -2.5369e-10, -7.5274e-11],
        [-6.0460e-11, -4.6536e-11, -6.2041e-10,  ..., -3.8537e-09,
         -6.0499e-09, -1.2458e-09],
        [-1.2023e-10,  9.0138e-11, -2.9118e-10,  ...,  9.5565e-09,
          2.3274e-08,  8.1924e-09]], device='cuda:0')
None
errG for generator: 0.0012082959292456508
torch.Size([1611, 1611])
torch.Size([1612, 1612])


Training epoch#1:  11%|█▏        | 4/35 [00:19<02:04,  4.03s/it]

NetG parameter Update with gradient tensor([[-2.7858e-23, -4.3726e-21,  8.0379e-20,  ..., -8.2847e-21,
         -2.3796e-18,  1.0938e-19],
        [ 0.0000e+00,  1.1908e-22,  5.5784e-20,  ..., -7.5729e-20,
         -6.3210e-19,  0.0000e+00],
        [-1.4629e-15, -2.4484e-15, -1.2725e-13,  ..., -4.9593e-13,
         -1.4729e-12, -1.4533e-13],
        ...,
        [ 1.8142e-13,  8.9272e-13,  3.1484e-11,  ..., -1.1343e-11,
          1.0107e-10, -2.0742e-11],
        [-1.1986e-10,  1.7830e-10,  3.3210e-09,  ..., -3.5750e-09,
         -1.0021e-09, -1.9392e-09],
        [ 7.3380e-15,  2.3276e-14,  1.1769e-13,  ..., -5.9490e-11,
         -2.2905e-10,  4.0283e-13]], device='cuda:0')
None
errG for generator: 0.0002687618543859571
torch.Size([1590, 1590])
torch.Size([1591, 1591])


Training epoch#1:  14%|█▍        | 5/35 [00:21<01:39,  3.31s/it]

NetG parameter Update with gradient tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ..., -1.2327e-20,
         -1.3627e-20, -7.8875e-22],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ..., -2.1107e-20,
         -1.9448e-20,  3.2930e-22],
        [ 3.4740e-19,  3.8095e-19,  2.8388e-16,  ...,  6.0155e-15,
          1.2698e-14,  1.0613e-15],
        ...,
        [-9.5077e-16,  1.0633e-12,  3.5649e-12,  ...,  4.2870e-11,
          6.5003e-11,  7.6742e-12],
        [ 2.2756e-12, -5.2824e-11, -7.5207e-11,  ...,  1.4582e-10,
          1.2254e-09,  1.0600e-09],
        [-7.7126e-18,  4.1503e-17,  1.0052e-16,  ...,  6.4926e-12,
          2.4737e-11, -2.3302e-15]], device='cuda:0')
None
errG for generator: 4.734957838081755e-05
torch.Size([2159, 2159])
torch.Size([2160, 2160])


Training epoch#1:  17%|█▋        | 6/35 [00:27<02:02,  4.23s/it]

NetG parameter Update with gradient tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ..., -5.2815e-21,
         -8.6422e-21, -1.1554e-22],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  3.0086e-20,
          4.9231e-20,  6.5817e-22],
        [ 2.1752e-24,  1.3560e-22,  3.4811e-21,  ..., -2.7997e-17,
         -4.5749e-17, -6.0353e-19],
        ...,
        [-3.7402e-18, -2.4932e-15, -6.6019e-14,  ...,  1.2423e-12,
          8.6935e-13, -1.9191e-13],
        [-5.6405e-13, -5.9297e-12, -2.0916e-11,  ...,  1.5486e-11,
          2.0652e-10, -5.0533e-12],
        [-8.4061e-21,  2.3644e-19,  6.8101e-19,  ...,  1.5683e-13,
          2.5689e-13,  3.5651e-15]], device='cuda:0')
None
errG for generator: 4.488212653086521e-06
torch.Size([1550, 1550])
torch.Size([1551, 1551])


Training epoch#1:  20%|██        | 7/35 [00:29<01:38,  3.53s/it]

NetG parameter Update with gradient tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  2.7005e-25,  1.0419e-23,  ...,  9.4659e-20,
          1.5928e-19,  1.0746e-23],
        ...,
        [-2.6470e-19,  8.6779e-19, -1.0362e-16,  ...,  1.1080e-12,
          1.8748e-12, -2.2478e-16],
        [-9.3395e-13,  1.2368e-13, -1.1546e-12,  ..., -1.0686e-10,
         -2.4678e-10, -1.0460e-11],
        [ 2.4172e-22,  0.0000e+00,  0.0000e+00,  ..., -1.2704e-20,
         -1.0008e-19, -2.5974e-33]], device='cuda:0')
None
errG for generator: 9.6201426913467e-07
torch.Size([2254, 2254])
torch.Size([2255, 2255])


Training epoch#1:  23%|██▎       | 8/35 [00:35<01:59,  4.41s/it]

NetG parameter Update with gradient tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  4.2012e-22,
          7.3043e-22, -5.3221e-26],
        ...,
        [ 0.0000e+00,  5.2551e-23, -1.9930e-21,  ...,  5.1140e-17,
          9.0975e-17,  4.5350e-19],
        [ 1.3102e-16, -1.4110e-13,  1.8636e-13,  ...,  1.4266e-11,
          4.3216e-11,  9.8570e-13],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  2.0446e-20,
          3.5581e-20,  0.0000e+00]], device='cuda:0')
None
errG for generator: 1.0834463637365843e-07
torch.Size([2069, 2069])
torch.Size([2070, 2070])


Training epoch#1:  26%|██▌       | 9/35 [00:40<01:59,  4.60s/it]

NetG parameter Update with gradient tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ..., -2.0664e-24,
         -3.6718e-24,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  1.0180e-18,
          2.0097e-18,  3.9953e-20],
        [ 5.0527e-17, -3.3481e-16, -1.7340e-13,  ..., -1.1408e-12,
         -2.9286e-12, -1.4902e-14],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  1.1018e-26,
          1.9578e-26,  0.0000e+00]], device='cuda:0')
None
errG for generator: 1.772522395526721e-08
torch.Size([1908, 1908])
torch.Size([1909, 1909])


Training epoch#1:  29%|██▊       | 10/35 [00:43<01:46,  4.27s/it]

NetG parameter Update with gradient tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ..., -6.4871e-27,
         -1.1723e-26,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00, -8.1796e-22,  ...,  5.2515e-20,
          8.8798e-20,  0.0000e+00],
        [-1.1245e-20, -1.3227e-17, -2.4596e-15,  ...,  7.4910e-15,
         -6.4332e-15,  2.8890e-18],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]], device='cuda:0')
None
errG for generator: 3.4749572108694338e-09
torch.Size([1193, 1193])
torch.Size([1194, 1194])


Training epoch#1:  29%|██▊       | 10/35 [00:45<01:53,  4.54s/it]

NetG parameter Update with gradient tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ..., -4.2201e-29,
         -7.7320e-29,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ..., -3.0569e-20,
         -5.6007e-20,  0.0000e+00],
        [-5.2518e-21, -5.8681e-17, -6.1292e-15,  ..., -2.8110e-14,
         -1.0662e-13, -1.8786e-17],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]], device='cuda:0')





KeyboardInterrupt: 