ⓒ 2022 CCNets Inc.

https://ccnets.org

Initialization

In [None]:
!pip install scikit-learn==1.1 --user

In [None]:
import sklearn
sklearn.__version__

# Preprocessing

In [None]:
import sys
path_append = "../"
sys.path.append(path_append)  # Go up one directory from where you are.

import torch
import pandas as pd
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler

In [None]:
dataroot = path_append + "../data/creditcardfraud/creditcard.csv"
df = pd.read_csv(dataroot)
df

In [None]:
print('No Frauds', round(df['Class'].value_counts()[0] / len(df) *100,2), '%of the dataset')
print('Frauds', round(df['Class'].value_counts()[1] / len(df) *100,2), '%of the dataset')

# DataLoader

df.isnull().sum()

In [None]:
# https://fraud-detection-handbook.github.io/fraud-detection-handbook/Chapter_7_DeepLearning/FeedForwardNeuralNetworks.html
class Dataset(torch.utils.data.Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y
        
    def __len__(self):
        return len(self.x)

    def __getitem__(self, index):
        vals = torch.tensor(self.x[index], dtype = torch.float32)
        label = torch.tensor(self.y[index], dtype= torch.float32).unsqueeze(-1)
        return vals, label


In [None]:
y = df[['Class']]
X = df.drop(['Class'],axis=1)

sc = RobustScaler()
X['scaled_amount'] = sc.fit_transform(X['Amount'].values.reshape(-1,1))
X['scaled_time'] = sc.fit_transform(X['Time'].values.reshape(-1,1))
X.drop(['Time','Amount'], axis=1, inplace=True)
X = X[:]


# Training

In [None]:
from ccnets.config import get_parser
from ccnets.ccnets import CCNets
from ccnets.resnets import ResNets
from nn.custom_deepfm import DeepFM
from nn.custom_dnn import ResMLP, MLP 
from ccnets.utils.log import create_log_details, create_log_name
from ccnets.utils.setting import set_random_seed

import os
from torch.utils.tensorboard import SummaryWriter


In [None]:
args = get_parser()
args.device = torch.device('cuda:0' if (torch.cuda.is_available() and args.ngpu > 0) else "cpu")

In [None]:
import IPython ; file_path = IPython.extract_module_locals()[1]['__vsc_ipynb_file__']
from pathlib import Path
file_name = Path(file_path).stem
model_path = path_append + f"models/{file_name}/"
temp_path = path_append + f"models/{'temp_'}{file_name}/"
log_path = path_append + f"log/{file_name}/"

if Path(temp_path).exists() is False: 
    os.mkdir(temp_path)

if Path(model_path).exists() is False: 
    os.mkdir(model_path)

if Path(log_path).exists() is False: 
    os.mkdir(log_path)  

args.model_path = model_path
args.temp_path = temp_path

In [None]:
args.num_epoch = 10
args.lr = 2e-4
args.batch_size = 64
args.step_size = 10

args.num_layer = 3
args.hidden_size = 256

args.obs_size = 30
args.label_size = 1
args.explain_size = 1  
args.seq_len = 0

args.num_checkpoints = 100
args.use_one_hot = False

args.reasoner_joint_type = "add"
args.producer_joint_type = "add"
args.label_type = "UC" 

args.obs_fn = "none"
args.label_fn = "none"

In [None]:
test_sizes = [0.2, 0.6, 0.8, 0.9]

for test_size in test_sizes:

    args.num_epoch = int(round(3.2/(1 - test_size)))

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size, shuffle= False)

    X_train = X_train.iloc[:, :].values 
    X_test = X_test.iloc[:, :].values 
    y_train = y_train.iloc[:, -1].values
    y_test = y_test.iloc[:, -1].values

    trainset = Dataset(X_train, y_train)
    testset = Dataset(X_test, y_test)
    
    args.loss_type = "L1"
    args.error_type = "Sub"
    args.loss_reduction = "all"
    args.error_reduction = "none"
    log_details = create_log_details(args)
    args.log = SummaryWriter(log_dir=create_log_name(log_path, log_details))
    set_random_seed(0)

    ccnets = CCNets(args, MLP, DeepFM, ResMLP)
    ccnets.train(trainset, testset)

    args.loss_type = "MSE"
    args.loss_reduction = "all"
    log_details = create_log_details(args)
    args.log = SummaryWriter(log_dir=create_log_name(log_path, log_details))
    set_random_seed(0)
    resnets = ResNets(args, MLP, DeepFM)
    resnets.train(trainset, testset)