Based on:

@book{leborgne2022fraud,

title={Reproducible Machine Learning for Credit Card Fraud Detection - Practical Handbook},

author={Le Borgne, Yann-A{\"e}l and Siblini, Wissam and Lebichot, Bertrand and Bontempi, Gianluca},

url={https://github.com/Fraud-Detection-Handbook/fraud-detection-handbook},

year={2022},

publisher={Universit{\'e} Libre de Bruxelles}

}

In [1]:
import datetime
import sklearn
import xgboost
import torch
import time
import numpy as np
import pickle

Testing different models on a baseline feature transformation and a simple train-test split

In [2]:
!curl -O https://raw.githubusercontent.com/Fraud-Detection-Handbook/fraud-detection-handbook/main/Chapter_References/shared_functions.py
%run shared_functions.py
# missing:
# .to('cuda')/.to('cpu') to be added in FraudDataset class in return values

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 63257  100 63257    0     0   165k      0 --:--:-- --:--:-- --:--:--  165k


In [3]:
# 1. create 'fraud-detection-handbook' folder one folder above
# 2. cd to the folder
# 3. git clone https://github.com/Fraud-Detection-Handbook/simulated-data-transformed
DIR_INPUT = '../fraud-detection-handbook/simulated-data-transformed/data/'

BEGIN_DATE = "2018-07-25"
END_DATE = "2018-08-14"

print("Load  files")
%time transactions_df=read_from_files(DIR_INPUT, BEGIN_DATE, END_DATE)
print("{0} transactions loaded, containing {1} fraudulent transactions".format(len(transactions_df),transactions_df.TX_FRAUD.sum()))

Load  files
CPU times: total: 93.8 ms
Wall time: 88 ms
201295 transactions loaded, containing 1792 fraudulent transactions


In [4]:
transactions_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 201295 entries, 0 to 201294
Data columns (total 23 columns):
 #   Column                               Non-Null Count   Dtype         
---  ------                               --------------   -----         
 0   TRANSACTION_ID                       201295 non-null  int64         
 1   TX_DATETIME                          201295 non-null  datetime64[ns]
 2   CUSTOMER_ID                          201295 non-null  int64         
 3   TERMINAL_ID                          201295 non-null  int64         
 4   TX_AMOUNT                            201295 non-null  float64       
 5   TX_TIME_SECONDS                      201295 non-null  int64         
 6   TX_TIME_DAYS                         201295 non-null  int64         
 7   TX_FRAUD                             201295 non-null  int64         
 8   TX_FRAUD_SCENARIO                    201295 non-null  int64         
 9   TX_DURING_WEEKEND                    201295 non-null  int64         
 

columns 0-8 : simulator data

columns 9+ : baseline feature transformation

(11-16 "keep track of the average spending amount and number of transcations for each customer and for three window sizes", for example CUSTOMER_ID_NB_TX_7DAY_WINDOW - "number of transcations by the customer in the last 7 days")

(17-22 "characterize the 'risk' associated with the terminal. The risk will be defined as the average number of frauds that were observed on the terminal for three window sizes")

In [5]:
start_date_training = datetime.datetime.strptime(BEGIN_DATE, "%Y-%m-%d")
delta_train = delta_delay = delta_test = 7

end_date_training = start_date_training+datetime.timedelta(days=delta_train-1)

start_date_test = start_date_training+datetime.timedelta(days=delta_train+delta_delay)
end_date_test = start_date_training+datetime.timedelta(days=delta_train+delta_delay+delta_test-1)


In [6]:
(train_df, test_df) = get_train_test_set(transactions_df,start_date_training,
                                       delta_train=7,delta_delay=7,delta_test=7)

In [7]:
output_feature="TX_FRAUD"

input_features=[
       'TX_AMOUNT',
       'TX_DURING_WEEKEND',
       'TX_DURING_NIGHT',
       'CUSTOMER_ID_NB_TX_1DAY_WINDOW',
       'CUSTOMER_ID_AVG_AMOUNT_1DAY_WINDOW',
       'CUSTOMER_ID_NB_TX_7DAY_WINDOW',
       'CUSTOMER_ID_AVG_AMOUNT_7DAY_WINDOW',
       'CUSTOMER_ID_NB_TX_30DAY_WINDOW',
       'CUSTOMER_ID_AVG_AMOUNT_30DAY_WINDOW',
       'TERMINAL_ID_NB_TX_1DAY_WINDOW',
       'TERMINAL_ID_RISK_1DAY_WINDOW',
       'TERMINAL_ID_NB_TX_7DAY_WINDOW',
       'TERMINAL_ID_RISK_7DAY_WINDOW',
       'TERMINAL_ID_NB_TX_30DAY_WINDOW',
       'TERMINAL_ID_RISK_30DAY_WINDOW'
       ]

In [8]:
classifier = sklearn.tree.DecisionTreeClassifier(max_depth = 2, random_state=0)

model_and_predictions_dictionary = fit_model_and_get_predictions(classifier, train_df, test_df, 
                                                                 input_features, output_feature,
                                                                 scale=False)

In [9]:
model_and_predictions_dictionary

{'classifier': DecisionTreeClassifier(max_depth=2, random_state=0),
 'predictions_test': array([0.00353643, 0.00353643, 0.00353643, ..., 0.00353643, 0.00353643,
        0.00353643]),
 'predictions_train': array([0.00353643, 0.00353643, 0.00353643, ..., 0.00353643, 0.00353643,
        0.00353643]),
 'training_execution_time': 0.09800076484680176,
 'prediction_execution_time': 0.01399993896484375}

In [10]:
predictions_df=test_df
predictions_df['predictions']=model_and_predictions_dictionary['predictions_test']
    
performance_assessment(predictions_df, top_k_list=[100])

Unnamed: 0,AUC ROC,Average precision,Card Precision@100
0,0.763,0.496,0.241


In [11]:
pickle.dump(classifier, open('models/baseline/dt_maxdepth2_model.sav', 'wb'))

In [12]:
classifiers_dictionary={'Logistic regression':sklearn.linear_model.LogisticRegression(random_state=0), 
                        'Decision tree with depth of two':sklearn.tree.DecisionTreeClassifier(max_depth=2,random_state=0), 
                        'Decision tree - unlimited depth':sklearn.tree.DecisionTreeClassifier(random_state=0), 
                        'Random forest':sklearn.ensemble.RandomForestClassifier(random_state=0,n_jobs=-1),
                        'XGBoost':xgboost.XGBClassifier(random_state=0,n_jobs=-1),
                       }

fitted_models_and_predictions_dictionary={}

for classifier_name in classifiers_dictionary:
    
    model_and_predictions = fit_model_and_get_predictions(classifiers_dictionary[classifier_name], train_df, test_df, 
                                                                                  input_features=input_features,
                                                                                output_feature=output_feature)
    fitted_models_and_predictions_dictionary[classifier_name]=model_and_predictions

In [13]:
df_performances=performance_assessment_model_collection(fitted_models_and_predictions_dictionary, test_df, 
                                                        type_set='test', 
                                                        top_k_list=[100])
df_performances

Unnamed: 0,AUC ROC,Average precision,Card Precision@100
Logistic regression,0.871,0.606,0.291
Decision tree with depth of two,0.763,0.496,0.241
Decision tree - unlimited depth,0.788,0.309,0.243
Random forest,0.867,0.658,0.287
XGBoost,0.862,0.639,0.273


In [14]:
df_execution_times=execution_times_model_collection(fitted_models_and_predictions_dictionary)
df_execution_times

Unnamed: 0,Training execution time,Prediction execution time
Logistic regression,0.106002,0.009998
Decision tree with depth of two,0.097003,0.010998
Decision tree - unlimited depth,0.964999,0.013999
Random forest,1.685004,0.090001
XGBoost,2.293,0.033001


In [15]:
pickle.dump(classifiers_dictionary['Logistic regression'], open('models/baseline/lr_model.sav', 'wb'))
pickle.dump(classifiers_dictionary['Decision tree - unlimited depth'], open('models/baseline/dt_maxdepth_unlim_model.sav', 'wb'))
pickle.dump(classifiers_dictionary['Random forest'], open('models/baseline/rf_model.sav', 'wb'))
pickle.dump(classifiers_dictionary['XGBoost'], open('models/baseline/xgb_model.sav', 'wb'))

In [16]:
if torch.cuda.is_available():
    DEVICE = "cuda" 
else:
    DEVICE = "cpu"
print("Selected device is",DEVICE)

Selected device is cuda


In [17]:
SEED = 42
seed_everything(SEED)

In [18]:
x_train = torch.FloatTensor(train_df[input_features].values)
x_test = torch.FloatTensor(test_df[input_features].values)
y_train = torch.FloatTensor(train_df[output_feature].values)
y_test = torch.FloatTensor(test_df[output_feature].values)

In [19]:
class FraudDataset(torch.utils.data.Dataset):
    
    def __init__(self, x, y):
        'Initialization'
        self.x = x
        self.y = y

    def __len__(self):
        'Denotes the total number of samples'
        return len(self.x)

    def __getitem__(self, index):
        'Generates one sample of data'
        if self.y is not None:
            return self.x[index].to(DEVICE), self.y[index].to(DEVICE)
        else:
            return self.x[index].to(DEVICE)

In [20]:
train_loader_params = {'batch_size': 64,
          'shuffle': True,
          'num_workers': 0}
test_loader_params = {'batch_size': 64,
          'num_workers': 0}

training_set = FraudDataset(x_train, y_train)

testing_set = FraudDataset(x_test, y_test)


training_generator = torch.utils.data.DataLoader(training_set, **train_loader_params)
testing_generator = torch.utils.data.DataLoader(testing_set, **test_loader_params)

In [21]:
class SimpleFraudMLP(torch.nn.Module):
    
    def __init__(self, input_size, hidden_size):
        super(SimpleFraudMLP, self).__init__()
        self.input_size = input_size
        self.hidden_size  = hidden_size
        
        self.fc1 = torch.nn.Linear(self.input_size, self.hidden_size)
        self.relu = torch.nn.ReLU()

        self.fc2 = torch.nn.Linear(self.hidden_size, 1)
        self.sigmoid = torch.nn.Sigmoid()
        
    def forward(self, x):
        
        hidden = self.fc1(x)
        relu = self.relu(hidden)
        output = self.fc2(relu)
        output = self.sigmoid(output)
        
        return output

In [22]:
model = SimpleFraudMLP(len(input_features), 1000).to(DEVICE)

In [23]:
criterion = torch.nn.BCELoss().to(DEVICE)

In [24]:
model.eval()

SimpleFraudMLP(
  (fc1): Linear(in_features=15, out_features=1000, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=1000, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [25]:
optimizer = torch.optim.SGD(model.parameters(), lr = 0.07)

In [26]:
# changed from 150 for faster results
n_epochs = 25
model.train()

start_time=time.time()
epochs_train_losses = []
epochs_test_losses = []
for epoch in range(n_epochs):
    model.train()
    train_loss=[]
    for x_batch, y_batch in training_generator:
        optimizer.zero_grad()
        y_pred = model(x_batch)
        loss = criterion(y_pred.squeeze(), y_batch)
        loss.backward()
        optimizer.step()
        train_loss.append(loss.item())
    
    epochs_train_losses.append(np.mean(train_loss))
    print('Epoch {}: train loss: {}'.format(epoch, np.mean(train_loss)))
    
    val_loss = evaluate_model(model,testing_generator,criterion)    
    epochs_test_losses.append(val_loss)
    print('test loss: {}'.format(val_loss))   
    print("")
    
training_execution_time=time.time()-start_time

Epoch 0: train loss: 0.03482447893932093
test loss: 0.02212812714421821

Epoch 1: train loss: 0.026304942334636153
test loss: 0.020956096490620177

Epoch 2: train loss: 0.02477469021703899
test loss: 0.020735017180904893

Epoch 3: train loss: 0.024022467529443717
test loss: 0.021015695675631982

Epoch 4: train loss: 0.023468064743609875
test loss: 0.020646726651373806

Epoch 5: train loss: 0.023047099678824123
test loss: 0.019709601263545297

Epoch 6: train loss: 0.022711212656856803
test loss: 0.01983951722781872

Epoch 7: train loss: 0.022619226127091996
test loss: 0.019601698620699787

Epoch 8: train loss: 0.02191990295894263
test loss: 0.020056967769849648

Epoch 9: train loss: 0.021763283022316962
test loss: 0.01953357877695379

Epoch 10: train loss: 0.021666503598763796
test loss: 0.02038441432545904

Epoch 11: train loss: 0.021089430717878988
test loss: 0.020437601461248015

Epoch 12: train loss: 0.020934559022927998
test loss: 0.019799922216856395

Epoch 13: train loss: 0.02095

In [27]:
training_execution_time

341.3051838874817

In [28]:
predictions_test = model(x_test.to(DEVICE))

In [29]:
predictions_df=test_df
predictions_df['predictions']=predictions_test.detach().cpu().numpy()
    
performance_assessment(predictions_df, top_k_list=[100])

Unnamed: 0,AUC ROC,Average precision,Card Precision@100
0,0.872,0.624,0.28


In [31]:
torch.save(model.state_dict(), 'models/baseline/simple_mlp_model.pt')

In [32]:
# testing if saved parameters above can be used to restore
# the model and run inference

model = SimpleFraudMLP(len(input_features), 1000).to(DEVICE)
model.load_state_dict(torch.load('models/baseline/simple_mlp_model.pt'))
model.eval()
predictions_test = model(x_test.to(DEVICE))
predictions_df=test_df
predictions_df['predictions']=predictions_test.detach().cpu().numpy()
    
performance_assessment(predictions_df, top_k_list=[100])

Unnamed: 0,AUC ROC,Average precision,Card Precision@100
0,0.872,0.624,0.28
