In [73]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter  # to print to tensorboard

import pandas as pd

# Data pre-process

## Read cleaned data

In [74]:
df = pd.read_csv('../OlympicHistory/CleanedData.csv')

## Find continuous data

In [75]:
df_conti = df[['Age','Height', 'Weight', 'AmountOfSport', 'AmountOfEvent']]

In [76]:
df_conti

Unnamed: 0,Age,Height,Weight,AmountOfSport,AmountOfEvent
0,24.0,180.0,80.0,1,1
1,23.0,170.0,60.0,1,1
2,24.0,175.0,71.0,1,1
3,34.0,182.0,95.0,1,1
4,21.0,185.0,82.0,1,2
...,...,...,...,...,...
188164,33.0,171.0,69.0,1,1
188165,29.0,179.0,89.0,1,1
188166,27.0,176.0,59.0,1,2
188167,30.0,185.0,96.0,1,1


## Reshape data to 4d array

In [77]:
input_data = df_conti.to_numpy().flatten()[0:df_conti.shape[0] // 5 * 5 * 5].reshape(-1, 1, 5, 5)
input_data.shape

(37633, 1, 5, 5)

# Build PyTorch Dataset

## Dataset class

In [78]:
class OlympicDataset(Dataset):
    
    def __init__(self, data: pd.DataFrame, transform=None):
        self.data = torch.from_numpy(data).float()
#         self.data = data
#         self.transform = transforms.Compose([transforms.ToTensor()]) 
        
    
    def __len__(self):
        return len(self.data)
    
    
    def __getitem__(self, idx):
        data_content = self.data[idx]
#         data_label = int(1)
#         return (data_content, data_label)
#         return self.transform((data_content, data_label))
#         return self.transform(data_content)
#         return (data_content, data_label)[0]
        return data_content

# Build GAN

## Hyperparameters

In [79]:
# Hyperparameters etc.
device = "cuda" if torch.cuda.is_available() else "cpu"
lr = 3e-4
z_dim = 64 # 128, 256
image_dim = 5 * 5 * 1  # 16
batch_size = 32
num_epochs = 5

In [80]:
fixed_noise = torch.randn((batch_size, z_dim)).to(device)

# transforms = transforms.Compose(
#     [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,)),]
# )

transforms = transforms.Compose(
    [transforms.ToTensor()]
)

## Dataset and DataLoader

In [81]:
dataset = OlympicDataset(input_data, transform=transforms)
# dataset = OlympicDataset(input_data, transform=None)

loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

## Discriminator

In [82]:
class Discriminator(nn.Module):
    def __init__(self, in_features):
        super().__init__()
        self.disc = nn.Sequential(
            nn.Linear(in_features, 128),
            nn.LeakyReLU(0.01),
            nn.Linear(128, 1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        return self.disc(x)

disc = Discriminator(image_dim).to(device)

## Generator

In [83]:
class Generator(nn.Module):
    def __init__(self, z_dim, img_dim):
        super().__init__()
        self.gen = nn.Sequential(
            nn.Linear(z_dim, 256),
            nn.LeakyReLU(0.01),
            nn.Linear(256, img_dim),
#             nn.Tanh(),  # normalize inputs to [-1, 1] so make outputs [-1, 1]
        )

    def forward(self, x):
        return self.gen(x)
    
gen = Generator(z_dim, image_dim).to(device)

## Optimizer

In [84]:
opt_disc = optim.Adam(disc.parameters(), lr=lr)
opt_gen = optim.Adam(gen.parameters(), lr=lr)
criterion = nn.BCELoss()

## Tensorboard

In [85]:
writer_fake = SummaryWriter(f"logs/fake")
writer_real = SummaryWriter(f"logs/real")

# Train/Test

In [86]:
step = 0
for epoch in range(num_epochs):
#     for batch_idx, (real, _) in enumerate(loader):
    for batch_idx, real in enumerate(loader):

        real = real.view(-1, 25).to(device)
        batch_size = real.shape[0]

        ### Train Discriminator: max log(D(x)) + log(1 - D(G(z)))
        noise = torch.randn(batch_size, z_dim).to(device)
        fake = gen(noise)
        disc_real = disc(real).view(-1)
        lossD_real = criterion(disc_real, torch.ones_like(disc_real))
        disc_fake = disc(fake).view(-1)
        lossD_fake = criterion(disc_fake, torch.zeros_like(disc_fake))
        lossD = (lossD_real + lossD_fake) / 2
        disc.zero_grad()
        lossD.backward(retain_graph=True)
        opt_disc.step()

        ### Train Generator: min log(1 - D(G(z))) <-> max log(D(G(z))
        # where the second option of maximizing doesn't suffer from
        # saturating gradients
        output = disc(fake).view(-1)
        lossG = criterion(output, torch.ones_like(output))
        gen.zero_grad()
        lossG.backward()
        opt_gen.step()

        if batch_idx == 0:
            print(
                f"Epoch [{epoch}/{num_epochs}] Batch {batch_idx}/{len(loader)} \
                      Loss D: {lossD:.4f}, loss G: {lossG:.4f}"
            )

            with torch.no_grad():
                fake = gen(fixed_noise).reshape(-1, 1, 5, 5)
                data = real.reshape(-1, 1, 5, 5)
                img_grid_fake = torchvision.utils.make_grid(fake, normalize=True)
                img_grid_real = torchvision.utils.make_grid(data, normalize=True)

                writer_fake.add_image(
                    "Fake Data", img_grid_fake, global_step=step
                )
                writer_real.add_image(
                    "Real Data", img_grid_real, global_step=step
                )
                step += 1

Epoch [0/5] Batch 0/1177                       Loss D: 2.6307, loss G: 0.6420
Epoch [1/5] Batch 0/1177                       Loss D: 0.1011, loss G: 1.8859
Epoch [2/5] Batch 0/1177                       Loss D: 0.1420, loss G: 1.7561
Epoch [3/5] Batch 0/1177                       Loss D: 0.1392, loss G: 2.3217
Epoch [4/5] Batch 0/1177                       Loss D: 0.5458, loss G: 0.7784


In [87]:
fake_df = pd.DataFrame(fake.flatten().reshape(-1, 5).detach().numpy())
fake_df = fake_df.astype('int32')
fake_df

Unnamed: 0,0,1,2,3,4
0,37,194,76,-3,-10
1,28,184,82,11,0
2,28,197,61,-6,0
3,10,181,87,0,-3
4,23,179,88,9,-8
