In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas.plotting import scatter_matrix
from mpl_toolkits.mplot3d import Axes3D
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

import sys
sys.path.append('..')
from utils.metrics import AndersonDarlingDistance, KendallDependenceMetric

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [2]:
'''
Load Dataset
'''
data = pd.read_csv('../data_train_log_return.csv', header=None).drop(columns=[0])
scaler = MinMaxScaler().fit(data.values)
X_train = torch.Tensor(scaler.transform(data.values)).to(device)
# X_train = torch.Tensor(data.values).to(device)

'''
Load Metrics
'''
ad = AndersonDarlingDistance()
kd = KendallDependenceMetric()

def compute_metrics(batch):
    size = batch.size(0)
    device_ = batch.device
    _,x = model.sample(size,device_)
    anderson = ad(batch.clone().detach(), x.clone().detach())
    kendall  = kd(batch.clone().detach(), x.clone().detach())
    return anderson, kendall

def visual_3D(data):
    x = data[:,0]
    y = data[:,1]
    z = data[:,2]
    color = data[:,3]  # 4th dimension
    
    fig = plt.figure(figsize=(6, 4))
    ax = fig.add_subplot(111, projection='3d')
    
    scatter = ax.scatter(x, y, z, c=color, cmap='viridis')
    
    plt.colorbar(scatter)
    ax.set_xlabel('1st Dimension')
    ax.set_ylabel('2nd Dimension')
    ax.set_zlabel('3rd Dimension')
    plt.show()

## GAN

In [3]:
'''
Model
'''

class Generator(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Generator, self).__init__()
        w = 8
        self.fc = nn.Sequential(
            nn.Linear(input_dim, w),
            nn.ReLU(),
            nn.Linear(w, w),
            nn.ReLU(),
            nn.Linear(w, output_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.fc(x)

class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super(Discriminator, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.fc(x)

# Parameters
latent_dim = 64  # latent dim
data_dim = 4    # data dimension
lr = 0.0001     # learn rate

# initialize
generator = Generator(latent_dim, data_dim).to(device)
discriminator = Discriminator(data_dim).to(device)

# BCE Loss and optimizer
loss_function = nn.BCELoss()
d_optimizer = optim.Adam(discriminator.parameters(), lr=lr)
g_optimizer = optim.Adam(generator.parameters(), lr=lr)

'''
Training
'''
epochs = 3000
batch_size = 512

for epoch in tqdm(range(epochs)):
    for i in range(0, len(X_train), batch_size):
        real_data = torch.Tensor(X_train[i:min(i+batch_size, len(X_train))])
        fake_data = generator(torch.randn(len(real_data), latent_dim).to(device))

        # Train Discriminator
        d_optimizer.zero_grad()
        real_loss = loss_function(discriminator(real_data), torch.ones(len(real_data), 1).to(device))
        fake_loss = loss_function(discriminator(fake_data.detach()), torch.zeros(len(real_data), 1).to(device))
        d_loss = real_loss + fake_loss
        d_loss.backward()
        d_optimizer.step()

        # Train Generator
        g_optimizer.zero_grad()
        g_loss = loss_function(discriminator(fake_data), torch.ones(len(real_data), 1).to(device))
        g_loss.backward()
        g_optimizer.step()

    # Progress
    if (epoch+1) % 500 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Discriminator Loss: {d_loss.item():.4f}, Generator Loss: {g_loss.item():.4f} \n\
              Anderson Darling Distance: {float(ad.forward(real_data, fake_data)):.4f} \n\
              Kendall Dependence: {float(kd.forward(real_data, fake_data)):.4f} \n\
              ")

torch.save(generator.state_dict(), 'GAN_Generator.pt')


  from .autonotebook import tqdm as notebook_tqdm
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
 17%|████▉                        | 515/3000 [00:04<00:24, 99.47it/s]

Epoch 500/3000, Discriminator Loss: 0.3261, Generator Loss: 3.6719 
              Anderson Darling Distance: 346.7607 
              Kendall Dependence: 2.0394 
              


 34%|█████████▏                 | 1021/3000 [00:09<00:18, 109.22it/s]

Epoch 1000/3000, Discriminator Loss: 0.7826, Generator Loss: 2.0984 
              Anderson Darling Distance: 86.8297 
              Kendall Dependence: 1.9564 
              


 51%|█████████████▋             | 1516/3000 [00:14<00:13, 108.71it/s]

Epoch 1500/3000, Discriminator Loss: 1.0084, Generator Loss: 1.2233 
              Anderson Darling Distance: 7.2389 
              Kendall Dependence: 2.0434 
              


 67%|██████████████████         | 2011/3000 [00:18<00:09, 108.80it/s]

Epoch 2000/3000, Discriminator Loss: 1.1034, Generator Loss: 1.1894 
              Anderson Darling Distance: 2.6096 
              Kendall Dependence: 1.9915 
              


 84%|██████████████████████▋    | 2517/3000 [00:23<00:04, 103.71it/s]

Epoch 2500/3000, Discriminator Loss: 1.1594, Generator Loss: 0.8848 
              Anderson Darling Distance: 3.7332 
              Kendall Dependence: 2.0555 
              


100%|███████████████████████████| 3000/3000 [00:27<00:00, 109.16it/s]

Epoch 3000/3000, Discriminator Loss: 1.2239, Generator Loss: 0.9146 
              Anderson Darling Distance: 4.8529 
              Kendall Dependence: 1.9500 
              





In [4]:
'''
Evaluation
'''
# Sampling
sample = generator(torch.randn(len(X_train), latent_dim).to(device))

print(f'Anderson Darling Distance: {float(ad.forward(X_train, sample)):.4f} \
        \nKendall Dependence: {float(kd.forward(X_train, sample)):.4f}')

from tqdm import tqdm

ads = []
kds = []

for i in tqdm(range(100)):
    with torch.no_grad():
        sample = generator(torch.randn(len(X_train), latent_dim).to(device))
    ads.append(float(ad.forward(X_train, sample)))
    kds.append(float(kd.forward(X_train, sample)))

print(f'Mean Anderson Darling Distance: {np.mean(ads):.4f} \
        \n Mean Kendall Dependence: {np.mean(kds):.4f}')

# visual_3D(X_train.cpu().detach().numpy())
# visual_3D(sample.cpu().detach().numpy())

Anderson Darling Distance: 11.7325         
Kendall Dependence: 2.0058


100%|█████████████████████████████| 100/100 [00:00<00:00, 145.82it/s]

Mean Anderson Darling Distance: 7.5966         
 Mean Kendall Dependence: 2.0003





In [5]:
'''
Inference
'''
! /Data/.sys/envs/env0/bin/python GAN_inference.py