In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [2]:
# Load and preprocess data
data = np.load("denoised_data.npy",allow_pickle=True)
data = data.T
X_train, X_test = train_test_split(data, test_size=0.2, random_state=42)

scaler = MinMaxScaler()
data_train = scaler.fit_transform(X_train)
data_test = scaler.transform(X_test)

train = torch.tensor(data_train.T, dtype=torch.float32)
test = torch.tensor(data_test.T, dtype=torch.float32)

train_dataset = TensorDataset(train)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

test_dataset = TensorDataset(test)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)

In [8]:
print(X_train.shape)
print(data_train.shape)
print(train.shape)
print(train_loader.shape)

(4528, 400)
(4528, 400)
torch.Size([400, 4528])


AttributeError: 'DataLoader' object has no attribute 'shape'

In [15]:
# GAN architecture
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(100, 256 * 100),
            nn.ReLU(),
            nn.BatchNorm1d(256 * 100),
            nn.Unflatten(1, (256, 100)),
            nn.ConvTranspose1d(256, 128, 25, stride=4, padding=11, output_padding=1),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.ConvTranspose1d(128, 64, 25, stride=4, padding=11, output_padding=1),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.ConvTranspose1d(64, 32, 25, stride=4, padding=11, output_padding=1),
            nn.ReLU(),
            nn.BatchNorm1d(32),
            nn.ConvTranspose1d(32, 1, 25, stride=4, padding=11, output_padding=1),
            nn.Tanh(),
        )

    def forward(self, x):
        x = self.model(x)
        return x.view(x.shape[0], -1)

    


class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Conv1d(1, 128, 25, stride=4, padding=11),
            nn.LeakyReLU(0.2),
            nn.Conv1d(128, 256, 25, stride=4, padding=11),
            nn.LeakyReLU(0.2),
            nn.Conv1d(256, 512, 25, stride=4, padding=11),
            nn.LeakyReLU(0.2),
            nn.Conv1d(512, 1024, 25, stride=4, padding=11),
            nn.LeakyReLU(0.2),
            nn.AdaptiveAvgPool1d(1),
            nn.Flatten(),
            nn.Linear(1024, 1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        x = x.view(x.shape[0], 1, x.shape[1])
        return self.model(x).view(x.shape[0], 1)




# Create the generator and discriminator
generator = Generator()
discriminator = Discriminator()

In [16]:
# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
generator.to(device)
discriminator.to(device)

# Set up the optimizers and loss function
optimizer_G = optim.Adam(generator.parameters(), lr=0.0002, betas=(0.5, 0.999))
optimizer_D = optim.Adam(discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))
loss_fn = nn.BCELoss()

In [18]:
# Train the GAN
epochs = 5000
loss_history = []

for epoch in range(epochs):
    for batch, real_spectra in enumerate(train_loader):
        real_spectra = real_spectra[0].to(device)
        batch_size = real_spectra.size(0)

        # Train the discriminator
        optimizer_D.zero_grad()
        real_labels = torch.ones(batch_size, 1).to(device)
        fake_labels = torch.zeros(batch_size, 1).to(device)

        real_preds = discriminator(real_spectra)
        d_loss_real = loss_fn(real_preds, real_labels)

        noise = torch.randn(batch_size, 100).to(device)
        fake_spectra = generator(noise)
        fake_preds = discriminator(fake_spectra.detach())
        d_loss_fake = loss_fn(fake_preds, fake_labels)

        d_loss = d_loss_real + d_loss_fake

        d_loss.backward()
        optimizer_D.step()

        # Train the generator
        optimizer_G.zero_grad()
        fake_preds_for_generator = discriminator(fake_spectra)
        g_loss = loss_fn(fake_preds_for_generator, real_labels)
        g_loss.backward()
        optimizer_G.step()

    loss_history.append((d_loss.item(), g_loss.item()))

    if epoch % 100 == 0:
        print(f"Epoch: {epoch}, D_loss: {d_loss.item()}, G_loss: {g_loss.item()}")

# Test the trained model on the test dataset
with torch.no_grad():
    for test_spectra in test_loader:
        test_spectra = test_spectra[0].to(device)
        batch_size = test_spectra.size(0)

        noise = torch.randn(batch_size, 100).to(device)
        generated_spectra = generator(noise)
        generated_spectra = generated_spectra.cpu().numpy()

        # Inverse transform the generated spectra
        reconstructed_spectra = scaler.inverse_transform(generated_spectra.T)

# Save the loss history for plotting
np.save("loss_history.npy", loss_history)


Epoch: 0, D_loss: 0.0007271510548889637, G_loss: 8.804856300354004


KeyboardInterrupt: 