In [None]:
import lib._util.visualplot as vp

In [None]:
import pandas as pd
pd.set_option('display.max_columns', None)

# Constant Variable

In [None]:
SOURCE_PATH_DATA = 'resources/data/'
OUT_PATH_GRAPH   = 'resources/output/graph/'

# Data Loading
- Reference: https://www.kaggle.com/mlg-ulb/creditcardfraud/home
- Time: Number of seconds elapsed between this transaction and the first transaction in the dataset
- V1-V28: May be result of a PCA Dimensionality reduction to protect user identities and sensitive features(v1-v28)
- Amount: Transaction amount
- Class: 1 for fraudulent transactions, 0 otherwise

In [None]:
df_chunks = pd.read_csv(f'{SOURCE_PATH_DATA}creditcard.csv', sep=',', chunksize=50_000,
                        dtype={'Class': str},
                        nrows=None)
data_df   = pd.concat(df_chunks)

data_df.shape

In [None]:
data_df.head()

In [None]:
vp.faststat(data_df)

###### Histogram

In [None]:
vp.histogram(data_df,
             bin_algo='count',
             max_col=4,
             out_path=OUT_PATH_GRAPH,
             layout_kwargs={'height': 2048})

###### Box

In [None]:
vp.box(data_df,
       color='Class',
       max_col=4,
       out_path=OUT_PATH_GRAPH,
       layout_kwargs={
           'height': 2048,
           'legend_orientation': 'h'
       })

###### Scatter

In [None]:
columns   = data_df.columns
xy_tuples = [(x, columns[i+1]) for i,x in enumerate(columns) if i +1 < len(columns)]

vp.scatter(data_df,
           xy_tuples=xy_tuples,
           color='Class',
           max_col=4,
           out_path=OUT_PATH_GRAPH,
           layout_kwargs={
               'height': 2048
           })

###### Correlation Matrix

In [None]:
vp.corrmat(data_df,
           absolute=True,
           matrix_type='upper',
           out_path=OUT_PATH_GRAPH,
           heatmap_kwargs={
               'reversescale': True
           })

###### Pair

In [None]:
vp.pair(data_df,
        color='Class',
        out_path=OUT_PATH_GRAPH,
        layout_kwargs={
            'width': 3072,
            'height': 3072
        },
        traces_kwargs={
            'diagonal_visible': False,
            'showlowerhalf': False
        })

###### KDE

In [None]:
vp.kde(data_df,
       color='Class',
       max_col=4,
       out_path=OUT_PATH_GRAPH,
       layout_kwargs={
           'height': 2048,
           'legend_orientation': 'h'
       })