In [33]:
import pandas as pd
import numpy as np
import glob
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchmetrics
from scipy import signal
import itertools

In [11]:
data_folder = "../../data/william/"

In [12]:
fig_folder = "../../fig/william/"

In [13]:
f_name = f'{data_folder}/preprocessed_data.csv'

In [14]:
data = pd.read_csv(f_name, index_col=0)
data

Unnamed: 0,label,0,1,2,3,4,5,6,7,8,...,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999
0,sitting,-0.416300,-0.417491,-0.420441,-0.429083,-0.420441,-0.414827,-0.414827,-0.425872,-0.434747,...,0.549077,0.555241,0.555241,0.539026,0.500123,0.539026,0.543436,0.543436,0.543436,0.541752
1,sitting,-0.975244,-0.975244,-0.975244,-0.956182,-0.958875,-0.958875,-0.958875,-0.939893,-0.934297,...,1.424881,1.448575,1.448575,1.456614,1.460817,1.460817,1.464875,1.485612,1.485612,1.419242
2,sitting,-0.075665,-0.075665,-0.031610,-0.031610,-0.044277,-0.054205,-0.054205,-0.038245,-0.038245,...,-0.450913,-0.458894,-0.458894,-0.437464,-0.429559,-0.429559,-0.465102,-0.465102,-0.437855,-0.390392
3,sitting,-1.344867,-1.358563,-1.358563,-1.358563,-1.360337,-1.360337,-1.400246,-1.406819,-1.400246,...,1.714682,1.714682,1.714712,1.742487,1.742487,1.713822,1.713822,1.737701,1.737701,1.730101
4,sitting,-0.972655,-1.007781,-1.018223,-1.018223,-1.061634,-1.061634,-1.059564,-1.026008,-1.026008,...,1.947539,1.947539,1.947539,1.883424,1.903985,1.903985,1.903985,1.918113,1.909350,1.909350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
345,standing,0.362166,0.462288,0.467350,0.467350,0.441121,0.441121,0.441121,0.443292,0.443292,...,-0.600820,-0.605646,-0.622196,-0.622196,-0.637492,-0.597702,-0.637492,-0.597702,-0.642326,-0.506493
346,standing,-0.025810,-0.025810,-0.020282,-0.013312,-0.016767,-0.005698,-0.016767,0.022315,-0.003027,...,0.071983,0.046967,0.046967,0.055400,0.055400,0.060659,0.084731,0.098476,0.084731,-0.000364
347,standing,0.945325,0.952541,0.962438,0.962438,0.950656,0.927253,0.927253,0.976097,0.980584,...,-1.318418,-1.337065,-1.371594,-1.372040,-1.372942,-1.372040,-1.372942,-1.362027,-1.374201,-1.351678
348,standing,1.015584,1.015584,1.015823,1.027837,1.027837,1.011747,1.008753,1.011747,1.024149,...,-1.649722,-1.649722,-1.659491,-1.679571,-1.679571,-1.668293,-1.665690,-1.665690,-1.622155,-1.598536


In [15]:
class Dataset(torch.utils.data.Dataset):
    'Characterizes a dataset for PyTorch'

    def __init__(self, x, y):
        'Initialization'
        self.x = torch.from_numpy(x.copy()).float()
        self.y = torch.from_numpy(y.copy()).long()

    def __len__(self):
        'Denotes the total number of samples'
        return len(self.y)

    def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        x = self.x[index]
        y = self.y[index]
        return x, y


def get_data(data_folder):

    f_name = f'{data_folder}/preprocessed_data.csv'
    df = pd.read_csv(f_name, index_col=0)

    idx_data = df.columns[1:]

    labels = list(df.label.unique())
    print(f"N labels = {len(labels)}")

    # n_obs = len(df)
    # n_feature = len(idx_data)

    x = df[idx_data].values

    x = signal.decimate(x, 4, axis=1)

    print(df.label.value_counts())

    df.label = pd.Categorical(df.label)
    y = df.label.cat.codes.values

    print("number of label 0", len(y) - y.sum())
    print("number of label 1", y.sum())

    print("X shape", x.shape)

    training_data = Dataset(x, y)
    return training_data

In [17]:
train = get_data(data_folder=data_folder)

N labels = 2
standing    178
sitting     172
Name: label, dtype: int64
number of label 0 172
number of label 1 178
X shape (350, 500)


In [46]:
def sample_normal(mu, logvar, latent_dim):
    std = torch.exp(0.5*logvar)
    shape = (mu.size(0), latent_dim)
    rn = torch.randn(shape)
    z = rn * std + mu
    return z

In [70]:
class Encoder(nn.Module):
    def __init__(self, input_dim, latent_dim):
        super(Encoder, self).__init__()
        
        self.latent_dim = latent_dim

        self.model = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(0.2, inplace=True),
        )

        self.mu = nn.Linear(512, latent_dim)
        self.logvar = nn.Linear(512, latent_dim)

    def forward(self, x):
        x_ = self.model(x)
        mu = self.mu(x_)
        logvar = self.logvar(x_)
        z = sample_normal(mu, logvar, latent_dim)
        return z

In [71]:
class Decoder(nn.Module):
    def __init__(self, latent_dim, input_dim):
        super(Decoder, self).__init__()

        self.model = nn.Sequential(
            nn.Linear(latent_dim, 512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, input_dim),
            nn.Tanh(),
        )

    def forward(self, z):
        x = self.model(z)
        return x

In [72]:
class Discriminator(nn.Module):
    def __init__(self, latent_dim):
        super(Discriminator, self).__init__()

        self.model = nn.Sequential(
            nn.Linear(latent_dim, 512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(256, 1),
            nn.Sigmoid(),
        )

    def forward(self, z):
        validity = self.model(z)
        return validity.squeeze().long()

In [73]:
# Use binary cross-entropy loss
adversarial_loss = torch.nn.BCELoss()
reconstruction_loss = torch.nn.L1Loss()

In [74]:
input_dim = train.x.shape[1]
latent_dim = 3
print("input_dim", input_dim)
print("latent_dim", latent_dim)

# Initialize generator and discriminator
encoder = Encoder(input_dim=input_dim, latent_dim=latent_dim)
decoder = Decoder(input_dim=input_dim, latent_dim=latent_dim)
discriminator = Discriminator(latent_dim=latent_dim)

input_dim 500
latent_dim 3


In [75]:
# Configure data loader
batch_size = len(train)
dataloader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)

In [76]:
# Optimizers

lr = 0.005
b1 = 0.3
b2 = 0.999

optimizer_G = torch.optim.Adam(
    itertools.chain(encoder.parameters(), decoder.parameters()), lr=lr, betas=(b1, b2)
)
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=lr, betas=(b1, b2))

In [81]:
def generate(n_row, batches_done, directory):
    """Saves a grid of generated digits"""
    # Sample noise
    z = torch.randn(latent_dim)
    gen_x = decoder(z)
    
    fig, axes = plt.subplots(nrows=n_row)
    print(gen_x.data.shape)
    raise Exception
    #(gen_x.data, normalize=True)
    
    plt.savefig(f"{directory}/{batches_done}.png")

In [82]:
n_epochs = 1000

In [83]:
directory = "../../fig/william/aae"

In [84]:
for epoch in range(n_epochs):
    for i, (batch_x, batch_y) in enumerate(dataloader):

        # -----------------
        #  Train Generator
        # -----------------

        optimizer_G.zero_grad()

        encoded = encoder(batch_x)
        decoded = decoder(encoded)
        predicted = discriminator(encoded)
        print(predicted.squeeze().dtype)
        print(batch_y.dtype)

        # Loss measures generator's ability to fool the discriminator
        g_loss = 0.001 * adversarial_loss(predicted.squeeze(), batch_y)\
            + 0.999 * reconstruction_loss(decoded, real_imgs)

        g_loss.backward()
        optimizer_G.step()

        # ---------------------
        #  Train Discriminator
        # ---------------------

        optimizer_D.zero_grad()

        # Sample noise as discriminator ground truth
        z = torch.randn(batch_x.shape[0], latent_dim)

        # Measure discriminator's ability to classify real from generated samples
        real_loss = adversarial_loss(discriminator(z), valid)
        fake_loss = adversarial_loss(discriminator(encoded_imgs.detach()), fake)
        d_loss = 0.5 * (real_loss + fake_loss)

        d_loss.backward()
        optimizer_D.step()
        
    batches_done = epoch * len(dataloader) + i

    if epoch >= 25 and epoch % 10 == 0:
        val = input("\nContinue training? [y/n]: ")
        print()
        if val in ('y', 'yes'):
            val = True
            pass
        elif val in ('n', 'no'):
            break  
        else:
            pass

    if epoch > 10:
        if batches_done % opt.sample_interval == 0:
            generate(n_row=5, batches_done=batches_done, directory=directory)

    if epoch % 5 == 0:
        print(
            "[Epoch %d/%d] [D loss: %f] [G loss: %f]"
            % (epoch, opt.n_epochs, d_loss.item(), g_loss.item())
        )        

torch.int64
torch.int64


RuntimeError: "binary_cross_entropy" not implemented for 'Long'