In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from scipy.stats import gaussian_kde

In [2]:
r = 102317090
a_r = 0.5 * (r % 7)
b_r = 0.3 * (r % 5 + 1)
print(f"a_r: {a_r}, b_r: {b_r}")

a_r: 0.5, b_r: 0.3


In [3]:
df = pd.read_csv('data.csv', encoding='ISO-8859-1', low_memory=False)
df['no2'] = pd.to_numeric(df['no2'], errors='coerce')
x = df['no2'].dropna().values.reshape(-1, 1)

scaler = StandardScaler()
x_scaled = scaler.fit_transform(x).flatten()

In [4]:
z = x_scaled + a_r * np.sin(b_r * x_scaled)
z = torch.tensor(z, dtype=torch.float32).reshape(-1, 1)

In [5]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(1, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def forward(self, x):
        return self.model(x)

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(1, 64),
            nn.LeakyReLU(0.2),
            nn.Linear(64, 128),
            nn.LeakyReLU(0.2),
            nn.Linear(128, 64),
            nn.LeakyReLU(0.2),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

In [None]:
generator = Generator()
discriminator = Discriminator()
criterion = nn.BCELoss()
optimizer_g = optim.Adam(generator.parameters(), lr=0.0002)
optimizer_d = optim.Adam(discriminator.parameters(), lr=0.0002)

epochs = 5000
batch_size = 128

for epoch in range(epochs):
    idx = torch.randint(0, z.size(0), (batch_size,))
    real_samples = z[idx]
    real_labels = torch.ones((batch_size, 1))
    fake_labels = torch.zeros((batch_size, 1))

    noise = torch.randn((batch_size, 1))
    fake_samples = generator(noise)

    optimizer_d.zero_grad()
    out_real = discriminator(real_samples)
    loss_d_real = criterion(out_real, real_labels)
    out_fake = discriminator(fake_samples.detach())
    loss_d_fake = criterion(out_fake, fake_labels)
    loss_d = loss_d_real + loss_d_fake
    loss_d.backward()
    optimizer_d.step()

    optimizer_g.zero_grad()
    out_fake_g = discriminator(fake_samples)
    loss_g = criterion(out_fake_g, real_labels)
    loss_g.backward()
    optimizer_g.step()

In [None]:
with torch.no_grad():
    noise = torch.randn((10000, 1))
    generated_z = generator(noise).numpy().flatten()

kde = gaussian_kde(generated_z)
z_range = np.linspace(generated_z.min(), generated_z.max(), 1000)
pdf_values = kde(z_range)

plt.figure(figsize=(10, 6))
plt.plot(z_range, pdf_values, label='GAN Estimated PDF')
plt.hist(generated_z, bins=50, density=True, alpha=0.3, label='Generated Samples')
plt.title('PDF Estimation of Transformed Variable (z)')
plt.xlabel('z')
plt.ylabel('Density')
plt.legend()
plt.savefig('pdf_plot.png')
plt.show()