In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

from dcgan.components.generator import Generator
from dcgan.components.discriminator import Discriminator

In [6]:
batch_size = 256
num_epochs = 150
latent_size = 512
regWeight = 10
criterion = nn.BCELoss()

In [7]:
def Fisher(output, gen):
    real_label=torch.full((batch_size, ), 1, dtype=torch.float)
    l_adv = criterion(output, real_label)
    
    grad1 = torch.autograd.grad(l_adv, gen, create_graph=True)
    fisher = 0
    print(len(grad1))
    for g in grad1 :
        grad2 = torch.autograd.grad(g.sum(), gen, retain_graph=True)
        fisher += sum((g2**2).sum() for g2 in grad2)
    return fisher / batch_size

def loss_adapt(output, gen_param, label):
    return nn.BCELoss(output, label) + regWeight * Fisher(output, gen_param)

In [8]:
adapt_generator = Generator()
adapt_discriminator = Discriminator()

adapt_generator.load_state_dict(torch.load('models/epoch145_gen.pth'))
adapt_discriminator.load_state_dict(torch.load('models/epoch145_dis.pth'))

# optimizer_g = optim.Adam(generator.parameters(), lr=0.0006, betas=(0.5, 0.999))
# optimizer_d = optim.Adam(discriminator.parameters(), lr=0.0001, betas=(0.5, 0.999))

<All keys matched successfully>

In [11]:
for name, param in adapt_generator.named_parameters():
    if param is None:
        print(f"Parameter {name} is None")
    elif param.nelement() == 0:  # Check if the number of elements in the tensor is zero
        print(f"Parameter {name} is empty (uninitialized)")
    else:
        print(f"Parameter {name} is initialized with shape {param.shape}")

Parameter main.0.weight is initialized with shape torch.Size([2048, 512])
Parameter main.3.weight is initialized with shape torch.Size([2048, 1024, 4, 4])
Parameter main.4.weight is initialized with shape torch.Size([1024])
Parameter main.4.bias is initialized with shape torch.Size([1024])
Parameter main.6.weight is initialized with shape torch.Size([1024, 512, 4, 4])
Parameter main.7.weight is initialized with shape torch.Size([512])
Parameter main.7.bias is initialized with shape torch.Size([512])
Parameter main.9.weight is initialized with shape torch.Size([512, 256, 4, 4])
Parameter main.10.weight is initialized with shape torch.Size([256])
Parameter main.10.bias is initialized with shape torch.Size([256])
Parameter main.12.weight is initialized with shape torch.Size([256, 128, 4, 4])
Parameter main.13.weight is initialized with shape torch.Size([128])
Parameter main.13.bias is initialized with shape torch.Size([128])
Parameter main.15.weight is initialized with shape torch.Size([1

In [10]:
noise = torch.randn(batch_size, latent_size)
loss_adapt(adapt_discriminator(adapt_generator(noise)).view(-1), adapt_generator.parameters(), 1)



20


ValueError: grad requires non-empty inputs.

In [5]:
# Example model and data
model = nn.Linear(3, 1)  # Simple linear model
input_data = torch.randn(3)  # Random input
target = torch.randn(1)  # Random target

# Forward pass
output = model(input_data)
loss = (output - target).pow(2).mean()  # Mean squared error loss

# First derivative (gradient) computation
first_grads = torch.autograd.grad(loss, model.parameters(), create_graph=True)

# Compute second derivatives (Hessian diagonal) for each parameter
second_grads = []
for grad in first_grads:
    second_grad = torch.autograd.grad(grad.sum(), model.parameters(), create_graph=True)
    second_grads.append(second_grad)

# Printing second derivatives
for i, param in enumerate(model.parameters()):
    print(f"Second derivative for parameter {i}:")
    for grad in second_grads[i]:
        print(grad)


Second derivative for parameter 0:
tensor([[ 0.5034, -0.1130, -0.2328]], grad_fn=<TBackward0>)
tensor([0.5614], grad_fn=<ViewBackward0>)
Second derivative for parameter 1:
tensor([[ 1.7936, -0.4027, -0.8294]], grad_fn=<TBackward0>)
tensor([2.], grad_fn=<ViewBackward0>)
