## Prerequisites

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Observation of data

Let's see how our data look like

In [None]:
train_data = pd.read_csv('data/train.csv', header=None)

In [None]:
train_data

In [None]:
X = np.array(train_data.drop(labels=0, axis=1))
X.shape

In [None]:
data_dim = X.shape[1]

### Definition of GAN models

In [None]:
import torch
import torch.nn as nn

In [None]:
if torch.cuda.is_available():
    print('You use GPU !')
    device = torch.device('cuda')
else:
    print('You use CPU !')
    device = torch.device('cpu')

In [None]:
# Generative model
def make_generator(noise_dim):
    return nn.Sequential(
        nn.Linear(in_features=noise_dim, out_features=256),
        nn.BatchNorm1d(256),
        nn.LeakyReLU(),
        
        nn.Linear(in_features=256, out_features=128),
        nn.BatchNorm1d(128),
        nn.LeakyReLU(),
        
        nn.Linear(in_features=128, out_features=data_dim),
        nn.ReLU(),
    )


# Discriminative model
def make_discriminator():
    return nn.Sequential(
        nn.Linear(in_features=data_dim, out_features=256),
        nn.BatchNorm1d(256),
        nn.ReLU(),
        
        nn.Linear(in_features=256, out_features=64),
        nn.BatchNorm1d(64),
        nn.ReLU(),
        
        nn.Linear(in_features=64, out_features=2)
    )

In [None]:
noise_dim = 100
generator = make_generator(noise_dim=noise_dim).to(device)
discriminator = make_discriminator().to(device)

In [None]:
generator

In [None]:
discriminator

## Training

Split dataset into training data and evaluation data

In [None]:
eval_rate = 0.2

In [None]:
X = torch.tensor(X, dtype=torch.float)
eval_index = int(X.shape[0] * (1 - eval_rate))
X_train = X[0: eval_index]
X_eval = X[eval_index:]

Create dataloaders

In [None]:
batch_size = 64

X_train_loader = torch.utils.data.DataLoader(X_train, batch_size, shuffle=True)
X_eval_loader = torch.utils.data.DataLoader(X_eval, batch_size, shuffle=True)

We choose Adam Optimizer ($\beta_1 = 0.9$, $\beta_2 = 0.999$)

In [None]:
num_epochs = 500
lr = 0.001
gen_optimizer = torch.optim.Adam(generator.parameters(), lr=lr)
disc_optimizer = torch.optim.Adam(discriminator.parameters(), lr=lr)

criterion = torch.nn.CrossEntropyLoss()

In [None]:
for i in range(num_epochs):
    print(f'Epoch {i+1}\t', end='')
    disc_losses = []
    gen_losses = []
    for true_data in X_train_loader:
        #### DISCRIMINATOR OPTMIZATION ####
        n_data = true_data.shape[0]
        
        discriminator.train() # Unfreeze parameters of the discriminator
        generator.eval() # Freeze parameters of the generator
        
        disc_optimizer.zero_grad()
        
        true_x = true_data.to(device) # true data from the training dataset
        noise = torch.randn(n_data, noise_dim).to(device)
        fake_x = generator(noise).detach() # fake data from the noise distribution ~ N(0, 1)

        x = torch.cat([true_x, fake_x]) # Gather true and fake data
        
        
        true_y = torch.ones((n_data,), dtype=torch.long).to(device) # target 1 for true data
        fake_y = torch.zeros((n_data,), dtype=torch.long).to(device) # target 0 for fake data
        
        y = torch.cat([true_y, fake_y]) # Gather true and fake targets
        
        
        
        output = discriminator(x)
        
        disc_loss = criterion(output, y) # Penalize Discriminator for failing to distinguish fake data from true data
        disc_losses.append(disc_loss.item())
        
        disc_loss.backward()
        
        
        disc_optimizer.step() # Optimize Discriminator
        
        
        
        ### GENERATOR OPTIMIZATION ###
        n_data = true_data.shape[0] * 2
    
        discriminator.eval() # Freeze parameters of the discriminator
        generator.train() # Unfreeze parameters of the generator

        gen_optimizer.zero_grad()
        
        
        noise = torch.randn(n_data, noise_dim).to(device)
        fake_x = generator(noise) # fake data from the noise distribution ~ N(0, 1)
        
        true_y = torch.ones((n_data,), dtype=torch.long).to(device) # target 1 for true data
        
        output = discriminator(fake_x)
        
        gen_loss = criterion(output, true_y) # Penalize Generator for failing to fool the discriminator
        gen_losses.append(gen_loss.item())
        
        gen_loss.backward()
        

        gen_optimizer.step() # Optimize Generator      
    
    
    disc_loss = np.mean(disc_losses)
    gen_loss = np.mean(gen_losses)
    
    print(f'Disc. loss: {disc_loss}\t', end='')
    print(f'Gen. loss: {gen_loss}')
    

In [None]:
discriminator.eval()
generator.eval()

In [None]:
discriminator(generator(torch.randn(batch_size, noise_dim).to(device))).argmax(dim=1)

In [None]:
discriminator(X_eval.to(device)).argmax(dim=1)