In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
import pandas as pd
import pickle
import os
import seaborn as sns
import copy

from sklearn.metrics import normalized_mutual_info_score

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.autograd as autograd
import torch.optim as optim

import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

from vae import VAE
from loss_function import loss_function

In [None]:
# file located at '/data' in parent directory
path = os.path.abspath(os.path.join(os.getcwd(), '..', 'data', 'creditcard.csv')) 
print(path)
df = pd.read_csv(path)

In [None]:
display(df.head())

In [None]:
df['Class'].value_counts()

In [None]:
normal_data = df[df['Class']==0]
normal_data = normal_data.reset_index(drop=True)
display(normal_data.head())
print(normal_data.shape)

In [None]:
novel_data = df[df['Class']==1]
novel_data = novel_data.reset_index(drop=True)
display(novel_data.head())
print(novel_data.shape)

In [None]:
train_size = int(normal_data.shape[0]*0.6) # 60% train
valid_size = int(normal_data.shape[0]*0.2) # 20% valid
test_size = int(normal_data.shape[0]*0.2) # 20% test

In [None]:
print(train_size)
print(valid_size)
print(test_size)

In [None]:
train_data = normal_data[:train_size]
valid_data = normal_data[train_size:train_size+valid_size]
test_data = normal_data[train_size+valid_size:]

In [None]:
display(train_data.head())
print(train_data.shape)

In [None]:
test_data = pd.concat([test_data, novel_data])

In [None]:
display(test_data.head())

display(test_data.tail())

In [None]:
device = torch.device('cpu')

In [None]:
zdims=2
batch_size=128
epochs = 30

In [None]:
model = VAE(zdims)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [None]:
class CCFDataset(Dataset) : 
    def __init__(self, df, transform=transforms.ToTensor()) :
        self.df = df.reset_index(drop=True)
        
    def __len__(self) :
        return self.df.shape[0]
    
    def __getitem__(self, idx) :
        # x, y
        return self.df.drop(['Class', 'Time'], axis=1).iloc[idx], self.df['Class'].iloc[idx]

In [None]:
train_data = CCFDataset(train_data)
valid_data = CCFDataset(valid_data)
test_data = CCFDataset(test_data)

In [None]:
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [None]:
def train(epoch) : 
    
    train_loss = 0
    loss_train = []
    loss_valid = []
    
    # get loss for whole validation set
    loss_valid.append(validate(1))
    
    model.train() # toggle train mode
    
    # get batch loss for train set and backpropate
    for batch_idx, (data, _) in enumerate(train_loader) :
        data = Variable(data)
        optimizer.zero_grad()
        
        # Propagate
        recon_batch, mu, logvar = model(data)
        
        # Get loss value
        loss = loss_function(recon_batch, data, mu, logvar, batch_size)
        
        # Backpropagate
        loss.backward()
        train_loss += loss.item()
        
        # Update weights
        optimizer.step()
            
    print('====> Epoch: {} Average loss: {:.4f}'.format(
          epoch, train_loss / len(train_loader.dataset)))
      
    # append to list 'loss_train' instance to plot later
    loss_train.append(train_loss / len(train_loader.dataset))
    
    return loss_train, loss_valid

    
def validate(epoch) : 
    model.eval() # toggle inference mode
    valid_loss = 0
    
    
    for i, (data, _) in enumerate(test_loader) :
        data = Variable(data)
        
        # Propagate
        with torch.no_grad() :
            recon_batch, mu, logvar = model(data)
        
        # Get loss value
        valid_loss += loss_function(recon_batch, data, mu, logvar, batch_size).item()

    valid_loss /= len(test_loader.dataset)
    print('====> Validation set loss: {:.4f}'.format(valid_loss))
    
    return valid_loss

In [None]:
loss_train = []
loss_valid = []
lowest_valid_loss = 0
best_model = copy.deepcopy(model)

for epoch in range(1, epochs+1) :
    temp_train, temp_valid = train(epoch)
    
    if epoch == 1 :
        lowest_valid_loss = temp_valid
    else :
        if temp_valid <= lowest_valid_loss :
            lowest_valid_loss = temp_valid
            best_model = copy.deepcopy(model)
            
    loss_train.append(temp_train)
    loss_valid.append(temp_valid)