# How to use deep_tabular_augmentation

In [1]:
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
import mlprepare as mlp
import deep_tabular_augmentation as dta

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

DATA_PATH = 'data/creditcard.csv'

df = pd.read_csv(DATA_PATH, sep=',')

df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


Standardize data

In [3]:
df_base = df.copy()
cols = df_base.columns
mean_time=df_base['Time'].mean()
mean_amount=df_base['Amount'].mean()
std_time=df_base['Time'].std()
std_amount=df_base['Amount'].std()

df_base['Time']=(df_base['Time']-mean_time)/std_time
df_base['Amount']=(df_base['Amount']-mean_amount)/std_amount

Split into train and testset and keep only rows with the desired class (in this case Class==1)

In [4]:
X_train, X_test, y_train, y_test = mlp.split_df(df_base, dep_var='Class', test_size=0.3, split_mode='random')
X_train_fraud = X_train.iloc[np.where(y_train==1)[0]]
X_test_fraud = X_test.iloc[np.where(y_test==1)[0]]

Build trainloader and testloader

In [5]:
from torch.utils.data import Dataset, DataLoader
class DataBuilder(Dataset):
    def __init__(self, dataset):
        self.x = dataset.values
        self.x = torch.from_numpy(self.x).to(torch.float)
        self.len=self.x.shape[0]
    def __getitem__(self,index):      
        return self.x[index]
    def __len__(self):
        return self.len

traindata_set=DataBuilder(X_train_fraud)
testdata_set=DataBuilder(X_test_fraud)

trainloader=DataLoader(dataset=traindata_set,batch_size=1024)
testloader=DataLoader(dataset=testdata_set,batch_size=1024)

Define Autoencoder Architecture

In [6]:
D_in = traindata_set.x.shape[1]
H = 50
H2 = 12

Instantiate class

In [7]:
autoenc_model = dta.AutoencoderModel(trainloader, testloader, device, D_in, H, H2, latent_dim=3)


Train model

In [8]:
autoenc_model_fit = autoenc_model.fit(400, verbose=True)

====> Epoch: 200 Average training loss: 606.7421
====> Epoch: 200 Average test loss: 770.9880
====> Epoch: 400 Average training loss: 542.7246
====> Epoch: 400 Average test loss: 704.1669


Create fake data

In [9]:
df_fake = autoenc_model_fit.predict(no_samples=20,target_class=1)


In [10]:
df_fake.columns = cols
df_fake['Class'] = np.round(df_fake['Class']).astype(int)
df_fake['Time'] = (df_fake['Time']*std_time)+mean_time
df_fake['Amount'] = (df_fake['Amount']*std_amount)+mean_amount

print(df_fake.head())

            Time        V1        V2        V3        V4        V5        V6  \
0   93474.617188  0.293408 -0.246729  0.293894 -0.124402  0.380395 -0.125231   
1   51593.964844 -0.786972  1.407178 -0.978289  2.220576 -1.518627 -1.594598   
2  125036.664062  1.112408 -1.413149  1.329979 -1.547031  1.158600  0.849928   
3  105933.195312  0.401123 -0.277272  0.274252 -0.225549  0.519257  0.288135   
4  117775.039062  1.050917 -1.054142  0.967698 -1.142049  0.908053  0.638734   

         V7        V8        V9  ...       V21       V22       V23       V24  \
0  0.439954  0.056536 -0.032758  ...  0.803350 -0.273733  0.067035 -0.494492   
1 -1.622761  1.588105 -1.980040  ...  1.571305  0.111364 -0.444804  1.067126   
2  1.181366 -0.742970  1.539857  ... -0.506332  0.019432 -0.429322  0.153596   
3  0.375989 -0.381791 -0.003583  ...  0.191066 -0.063654  0.121053 -0.371531   
4  0.953342 -0.592627  1.111482  ... -0.151764 -0.425381 -0.335313  0.062146   

        V25       V26       V27       