In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F


def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)


class Discriminator(nn.Module):
    def __init__(self):
        super().__init__()
        self.main = nn.Sequential(
            # input 4096 -> 2 channels instead of 1
            nn.Conv1d(2, 64, kernel_size=4, stride=2, padding=1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            # state size 912
            nn.Conv1d(64, 128, kernel_size=4, stride=2, padding=1, bias=False),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(0.2, inplace=True),
            # state size 456
            nn.Conv1d(128, 256, kernel_size=4,
                      stride=2, padding=1, bias=False),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(0.2, inplace=True),
            # state size 228
            nn.Conv1d(256, 512, kernel_size=4,
                      stride=2, padding=1, bias=False),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(0.2, inplace=True),
            # state size 114
            nn.Conv1d(512, 2, kernel_size=256, stride=1, padding=0, bias=False),  # output 2 channels instead of 1
            nn.Sigmoid()
        )

    def forward(self, x, y=None):
        x = self.main(x)
        return x


class Generator(nn.Module):
    def __init__(self, nz):
        super().__init__()
        self.main = nn.Sequential(
            nn.ConvTranspose1d(nz, 512, 256, 1, 0, bias=False),
            nn.BatchNorm1d(512),
            nn.ReLU(True),

            nn.ConvTranspose1d(512, 256, 4, 2, 1, bias=False),
            nn.BatchNorm1d(256),
            nn.ReLU(True),

            nn.ConvTranspose1d(256, 128, 4, 2, 1, bias=False),
            nn.BatchNorm1d(128),
            nn.ReLU(True),

            nn.ConvTranspose1d(128, 64, 4, 2, 1, bias=False),
            nn.BatchNorm1d(64),
            nn.ReLU(True),

            nn.ConvTranspose1d(64, 2, 4, 2, 1, bias=False),  # output 2 channels instead of 1
            nn.Tanh()
        )

    def forward(self, x):
        x = self.main(x)
        return x
  

In [None]:
def load_iq_data(file_path, max_samples=4096*2, start_idx=0):
    total_samples = max_samples * 2  # Since I/Q samples are interleaved

    #  Open the file in binary mode and seek to `start_idx`
    with open(file_path, "rb") as f:
        f.seek(start_idx * 4 * 2)  # 4 bytes per float32, 2 channels (I/Q)
        raw_data = np.fromfile(f, dtype="float32", count=total_samples)

    #  Ensure we have enough data
    if raw_data.shape[0] < total_samples:
        raise ValueError(f"Not enough data in {file_path}. Requested {total_samples}, got {raw_data.shape[0]}.")

    #  Extract I/Q channels
    I = raw_data[0::2]  # Even indices
    Q = raw_data[1::2]  # Odd indices

    #  Stack into [2, max_samples] format
    iq_data = np.stack([I, Q], axis=0)

    #  Add batch dimension → [1, 2, max_samples]
    iq_data = np.expand_dims(iq_data, axis=0)

    #  Convert to PyTorch tensor
    data_tensor = torch.from_numpy(iq_data).float()

    #  Dummy label for now (adjust if needed)
    label_tensor = torch.tensor([0], dtype=torch.long)

    return data_tensor, label_tensor

so there are three ideas that i had,
1. Pass in the adv noise thats already effective against classifier into the gan. Gan extrapolates the noise to produce longer adversarial sequences of PGD. The discriminator learns to differentiate between real extended adversarial noise and the noise generated by your model but I dont think this is that useful
2. Conditioning on prover signal by passing in the orignal clean block from prover and learns to gen noise based on the prover block and extrapolate for longer sequences. But I dont know how different this will be from idea 1, maybe can test this out. Discriminator makes less detectable im not sure
3. this is where the innovation begins. maybe a hybrid of the two before where the GANs input are both the noise and the original sample 

In [3]:
adv_noise_path = "2m_0target3_noise.iq"
data_tensor, label_tensor = load_iq_data(IQ_FILE_PATH, max_samples=max_samples)
data_tensor = data_tensor.to(DEVICE)


lr = 2e-4
beta1 = 0.5
epoch_num = 32
batch_size = 8
nz = 100  # length of noise
ngpu = 0
input_size = 4096  # Size of input sequence
num_channels = 2
label_size = batch_size * num_channels
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# init netD and netG
netD = Discriminator().to(device)
netD.apply(weights_init)

netG = Generator(nz).to(device)
netG.apply(weights_init)


criterion = nn.BCELoss()

# used for visualizing training process
fixed_noise = torch.randn(16, nz, 1, device=device)

real_label = 1.
fake_label = 0.

optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))

for epoch in range(epoch_num):
    # Create labels
    real_labels = torch.ones(batch_size, num_channels).to(device)  # Real labels with 2 channels
    fake_labels = torch.zeros(batch_size, num_channels).to(device)  # Fake labels with 2 channels

    # Create real data (e.g., use random noise as a placeholder for real data)
    real_data = torch.randn(batch_size, num_channels, input_size).to(device)  # Real data with 2 channels
        
    real_cpu = real_data.to(device)
    b_size = real_cpu.size(0)

    # train netD
    label = torch.full((label_size,), real_label,
                       dtype=torch.float, device=device)
    netD.zero_grad()
    output = netD(real_cpu).view(-1)
    
    errD_real = criterion(output, label)
    errD_real.backward()
    D_x = output.mean().item()

    # train netG
    noise = torch.randn(b_size, nz, 1, device=device)
    fake = netG(noise)
    label.fill_(fake_label)
    output = netD(fake.detach()).view(-1)
    errD_fake = criterion(output, label)
    errD_fake.backward()
    D_G_z1 = output.mean().item()
    errD = errD_real + errD_fake
    optimizerD.step()
    netG.zero_grad()

    label.fill_(real_label)
    output = netD(fake).view(-1)
    errG = criterion(output, label)
    errG.backward()
    D_G_z2 = output.mean().item()
    optimizerG.step()

    print('[%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
          % (epoch, epoch_num,
             errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))


[0/32]	Loss_D: 10.0131	Loss_G: 6.6687	D(x): 0.4259	D(G(z)): 0.4297 / 0.1378
[1/32]	Loss_D: 5.8702	Loss_G: 9.6166	D(x): 0.7507	D(G(z)): 0.8149 / 0.0026
[2/32]	Loss_D: 3.9565	Loss_G: 7.7453	D(x): 0.4284	D(G(z)): 0.5765 / 0.0025
[3/32]	Loss_D: 4.8678	Loss_G: 5.9124	D(x): 0.3521	D(G(z)): 0.5114 / 0.0553
[4/32]	Loss_D: 5.2927	Loss_G: 4.2403	D(x): 0.2668	D(G(z)): 0.4262 / 0.0554
[5/32]	Loss_D: 6.9534	Loss_G: 4.3220	D(x): 0.3500	D(G(z)): 0.5914 / 0.0730
[6/32]	Loss_D: 5.8195	Loss_G: 7.0676	D(x): 0.5538	D(G(z)): 0.7453 / 0.0531
[7/32]	Loss_D: 4.0995	Loss_G: 6.9774	D(x): 0.4625	D(G(z)): 0.6185 / 0.0032
[8/32]	Loss_D: 3.7989	Loss_G: 7.4688	D(x): 0.4457	D(G(z)): 0.6583 / 0.0075
[9/32]	Loss_D: 6.6120	Loss_G: 4.3044	D(x): 0.3669	D(G(z)): 0.5759 / 0.1090
[10/32]	Loss_D: 5.4871	Loss_G: 4.2479	D(x): 0.3057	D(G(z)): 0.5595 / 0.0349
[11/32]	Loss_D: 5.4012	Loss_G: 6.9598	D(x): 0.6351	D(G(z)): 0.7374 / 0.0120
[12/32]	Loss_D: 7.5041	Loss_G: 6.0152	D(x): 0.3945	D(G(z)): 0.6364 / 0.0485
[13/32]	Loss_D: 7.911