Based on:

@book{leborgne2022fraud,

title={Reproducible Machine Learning for Credit Card Fraud Detection - Practical Handbook},

author={Le Borgne, Yann-A{\"e}l and Siblini, Wissam and Lebichot, Bertrand and Bontempi, Gianluca},

url={https://github.com/Fraud-Detection-Handbook/fraud-detection-handbook},

year={2022},

publisher={Universit{\'e} Libre de Bruxelles}

}

Covered subchapters:
* 7.4 Sequential models and representation learning

In [2]:
import datetime
import numpy as np
import torch
import pandas as pd
import wandb
import time

In [3]:
%run shared_functions.py

In [4]:
%run my_shared_functions.py

In [5]:
DIR_INPUT = '../fraud-detection-handbook/simulated-data-transformed/data/'

BEGIN_DATE = "2018-06-11"
END_DATE = "2018-09-14"

print("Load  files")
%time transactions_df=read_from_files(DIR_INPUT, BEGIN_DATE, END_DATE)
print("{0} transactions loaded, containing {1} fraudulent transactions".format(len(transactions_df),transactions_df.TX_FRAUD.sum()))

output_feature="TX_FRAUD"

input_features=['TX_AMOUNT','TX_DURING_WEEKEND', 'TX_DURING_NIGHT', 'CUSTOMER_ID_NB_TX_1DAY_WINDOW',
       'CUSTOMER_ID_AVG_AMOUNT_1DAY_WINDOW', 'CUSTOMER_ID_NB_TX_7DAY_WINDOW',
       'CUSTOMER_ID_AVG_AMOUNT_7DAY_WINDOW', 'CUSTOMER_ID_NB_TX_30DAY_WINDOW',
       'CUSTOMER_ID_AVG_AMOUNT_30DAY_WINDOW', 'TERMINAL_ID_NB_TX_1DAY_WINDOW',
       'TERMINAL_ID_RISK_1DAY_WINDOW', 'TERMINAL_ID_NB_TX_7DAY_WINDOW',
       'TERMINAL_ID_RISK_7DAY_WINDOW', 'TERMINAL_ID_NB_TX_30DAY_WINDOW',
       'TERMINAL_ID_RISK_30DAY_WINDOW']

Load  files
CPU times: total: 391 ms
Wall time: 408 ms
919767 transactions loaded, containing 8195 fraudulent transactions


In [6]:
# Set the starting day for the training period, and the deltas
start_date_training = datetime.datetime.strptime("2018-07-25", "%Y-%m-%d")
delta_train=7
delta_delay=7
delta_test=7


delta_valid = delta_test

start_date_training_with_valid = start_date_training+datetime.timedelta(days=-(delta_delay+delta_valid))

(train_df, valid_df)=get_train_test_set(transactions_df,start_date_training_with_valid,
                                       delta_train=delta_train,delta_delay=delta_delay,delta_test=delta_test)

# By default, scales input data
(train_df, valid_df)=scaleData(train_df, valid_df,input_features)

In [7]:
input_features


['TX_AMOUNT',
 'TX_DURING_WEEKEND',
 'TX_DURING_NIGHT',
 'CUSTOMER_ID_NB_TX_1DAY_WINDOW',
 'CUSTOMER_ID_AVG_AMOUNT_1DAY_WINDOW',
 'CUSTOMER_ID_NB_TX_7DAY_WINDOW',
 'CUSTOMER_ID_AVG_AMOUNT_7DAY_WINDOW',
 'CUSTOMER_ID_NB_TX_30DAY_WINDOW',
 'CUSTOMER_ID_AVG_AMOUNT_30DAY_WINDOW',
 'TERMINAL_ID_NB_TX_1DAY_WINDOW',
 'TERMINAL_ID_RISK_1DAY_WINDOW',
 'TERMINAL_ID_NB_TX_7DAY_WINDOW',
 'TERMINAL_ID_RISK_7DAY_WINDOW',
 'TERMINAL_ID_NB_TX_30DAY_WINDOW',
 'TERMINAL_ID_RISK_30DAY_WINDOW']

In [8]:
# landmark variable
dates = train_df['TX_DATETIME'].values

# time variable for chronological order sequence building
customer_ids = train_df['CUSTOMER_ID'].values

In [9]:
seq_len = 5

In [10]:
indices_sort = np.argsort(dates)
sorted_dates = dates[indices_sort]
sorted_ids = customer_ids[indices_sort]

In [11]:
sorted_dates

array(['2018-07-11T00:00:54.000000000', '2018-07-11T00:01:59.000000000',
       '2018-07-11T00:03:39.000000000', ...,
       '2018-07-17T23:57:59.000000000', '2018-07-17T23:58:23.000000000',
       '2018-07-17T23:59:52.000000000'], dtype='datetime64[ns]')

In [12]:
sorted_ids

array([ 579,  181, 4386, ...,  137, 1331, 1655], dtype=int64)

In [13]:
unique_customer_ids = np.unique(sorted_ids)
unique_customer_ids[0:10]

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int64)

##### Example

The sequence of transcation IDs for customer 0

In [14]:
idx = 0
current_customer_id = unique_customer_ids[idx]
customer_mask = sorted_ids == current_customer_id
# this is the full sequence of transaction indices (after sort) for customer 0
customer_full_seq = np.where(customer_mask)[0]
# this is the full sequence of transaction indices (before sort) for customer 0
customer_full_seq_original_indices = indices_sort[customer_full_seq]
customer_full_seq_original_indices

array([ 1888, 10080, 12847, 15627, 18908, 22842, 37972, 42529, 44495,
       48980, 58692, 63977], dtype=int64)

In [15]:
customer_all_seqs = rolling_window(customer_full_seq_original_indices,seq_len)
customer_all_seqs

array([[   -1,    -1,    -1,    -1,  1888],
       [   -1,    -1,    -1,  1888, 10080],
       [   -1,    -1,  1888, 10080, 12847],
       [   -1,  1888, 10080, 12847, 15627],
       [ 1888, 10080, 12847, 15627, 18908],
       [10080, 12847, 15627, 18908, 22842],
       [12847, 15627, 18908, 22842, 37972],
       [15627, 18908, 22842, 37972, 42529],
       [18908, 22842, 37972, 42529, 44495],
       [22842, 37972, 42529, 44495, 48980],
       [37972, 42529, 44495, 48980, 58692],
       [42529, 44495, 48980, 58692, 63977]])

6th sequence

In [16]:
customer_all_seqs[5]

array([10080, 12847, 15627, 18908, 22842])

In [17]:
x_train = torch.FloatTensor(train_df[input_features].values)

In [18]:
sixth_sequence = x_train[customer_all_seqs[5],:]
sixth_sequence

tensor([[ 0.6965, -0.6306,  2.1808, -0.8466,  0.0336, -1.1665,  0.0176, -0.9341,
          0.2310, -0.9810, -0.0816, -0.3445, -0.1231, -0.2491, -0.1436],
        [ 0.0358, -0.6306, -0.4586, -0.8466,  0.4450, -1.1665,  0.1112, -0.8994,
          0.2278,  0.0028, -0.0816,  0.6425, -0.1231, -0.0082, -0.1436],
        [ 1.1437, -0.6306, -0.4586, -0.3003,  0.7595, -1.0352,  0.2462, -0.8994,
          0.2458,  1.9702, -0.0816,  1.3005, -0.1231,  1.7989, -0.1436],
        [ 0.3645, -0.6306, -0.4586,  0.2461,  0.6804, -1.0352,  0.3186, -0.8647,
          0.2514,  1.9702, -0.0816,  0.3135, -0.1231, -0.8514, -0.1436],
        [ 0.3348, -0.6306, -0.4586, -0.3003,  0.7462, -1.1665,  0.2494, -0.8994,
          0.2262, -0.9810, -0.0816, -2.3185, -0.1231, -1.5743, -0.1436]])

In [19]:
sixth_sequence.shape

torch.Size([5, 15])

##### Efficient Pandas + groupby

In [20]:
df_ids_dates = pd.DataFrame({'CUSTOMER_ID': customer_ids,
        'TX_DATETIME': dates})
df_ids_dates

Unnamed: 0,CUSTOMER_ID,TX_DATETIME
0,579,2018-07-11 00:00:54
1,181,2018-07-11 00:01:59
2,4386,2018-07-11 00:03:39
3,4599,2018-07-11 00:05:50
4,4784,2018-07-11 00:06:04
...,...,...
66923,1494,2018-07-17 23:55:41
66924,2561,2018-07-17 23:56:05
66925,137,2018-07-17 23:57:59
66926,1331,2018-07-17 23:58:23


In [21]:
df_ids_dates["TX_DATETIME"].shift(1)

0                       NaT
1       2018-07-11 00:00:54
2       2018-07-11 00:01:59
3       2018-07-11 00:03:39
4       2018-07-11 00:05:50
                ...        
66923   2018-07-17 23:54:45
66924   2018-07-17 23:55:41
66925   2018-07-17 23:56:05
66926   2018-07-17 23:57:59
66927   2018-07-17 23:58:23
Name: TX_DATETIME, Length: 66928, dtype: datetime64[ns]

In [22]:
(df_ids_dates["TX_DATETIME"] - df_ids_dates["TX_DATETIME"].shift(1))

0                   NaT
1       0 days 00:01:05
2       0 days 00:01:40
3       0 days 00:02:11
4       0 days 00:00:14
              ...      
66923   0 days 00:00:56
66924   0 days 00:00:24
66925   0 days 00:01:54
66926   0 days 00:00:24
66927   0 days 00:01:29
Name: TX_DATETIME, Length: 66928, dtype: timedelta64[ns]

In [23]:
#checking if the transaction are chronologically ordered
datetime_diff = (df_ids_dates["TX_DATETIME"] - df_ids_dates["TX_DATETIME"].shift(1)).iloc[1:].dt.total_seconds()
assert (datetime_diff >= 0).all()

In [24]:
df_ids_dates["tmp_index"]  = np.arange(len(df_ids_dates))
df_ids_dates.head()

Unnamed: 0,CUSTOMER_ID,TX_DATETIME,tmp_index
0,579,2018-07-11 00:00:54,0
1,181,2018-07-11 00:01:59,1
2,4386,2018-07-11 00:03:39,2
3,4599,2018-07-11 00:05:50,3
4,4784,2018-07-11 00:06:04,4


In [25]:
df_groupby_customer_id = df_ids_dates.groupby("CUSTOMER_ID")

In [26]:
sequence_indices = pd.DataFrame(
            {
                "tx_{}".format(n): df_groupby_customer_id["tmp_index"].shift(seq_len - n - 1)
                for n in range(seq_len)
            }
        )

sequence_indices = sequence_indices.fillna(-1).astype(int)
sequence_indices.head()

Unnamed: 0,tx_0,tx_1,tx_2,tx_3,tx_4
0,-1,-1,-1,-1,0
1,-1,-1,-1,-1,1
2,-1,-1,-1,-1,2
3,-1,-1,-1,-1,3
4,-1,-1,-1,-1,4


In [27]:
sequence_indices.tail()

Unnamed: 0,tx_0,tx_1,tx_2,tx_3,tx_4
66923,59962,65409,65951,66805,66923
66924,42669,48902,62594,64441,66924
66925,-1,18988,23403,66777,66925
66926,56083,56468,63286,63338,66926
66927,49051,52037,58500,60393,66927


##### Comparison

In [65]:
print(customer_all_seqs[2])
print(customer_all_seqs[3])
print(customer_all_seqs[4])
print(30*'-')
print(sequence_indices.loc[12847].values)
print(sequence_indices.loc[15627].values)
print(sequence_indices.loc[18908].values)

[   -1    -1  1888 10080 12847]
[   -1  1888 10080 12847 15627]
[ 1888 10080 12847 15627 18908]
------------------------------
[   -1    -1  1888 10080 12847]
[   -1  1888 10080 12847 15627]
[ 1888 10080 12847 15627 18908]


##### Torch dataset

In [28]:
if torch.cuda.is_available():
    DEVICE = "cuda" 
else:
    DEVICE = "cpu"
print("Selected device is",DEVICE)

Selected device is cuda


In [29]:
x_train = torch.FloatTensor(train_df[input_features].values)
x_valid = torch.FloatTensor(valid_df[input_features].values)
y_train = torch.FloatTensor(train_df[output_feature].values)
y_valid = torch.FloatTensor(valid_df[output_feature].values)

In [30]:
SEED = 42
seed_everything(SEED)

In [31]:
train_loader_params = {'batch_size': 64,
          'shuffle': True,
          'num_workers': 0}


# Generators

training_set = FraudSequenceDataset(x_train, y_train,train_df['CUSTOMER_ID'].values, train_df['TX_DATETIME'].values,seq_len,padding_mode = "zeros")
training_generator = torch.utils.data.DataLoader(training_set, **train_loader_params)

In [32]:
x_batch, y_batch = next(iter(training_generator))

In [33]:
# batch size, number of features, sequence length
x_batch.shape

torch.Size([64, 15, 5])

In [34]:
y_batch.shape

torch.Size([64])

##### CNN

In [35]:
seed_everything(SEED)

training_set = FraudSequenceDataset(x_train, 
                                    y_train,train_df['CUSTOMER_ID'].values, 
                                    train_df['TX_DATETIME'].values,
                                    seq_len,
                                    padding_mode = "zeros")

valid_set = FraudSequenceDataset(x_valid, 
                                 y_valid,
                                 valid_df['CUSTOMER_ID'].values, 
                                 valid_df['TX_DATETIME'].values,
                                 seq_len,
                                 padding_mode = "zeros")

training_generator,valid_generator = prepare_generators(training_set, valid_set, batch_size=64)
cnn = FraudConvNet(x_train.shape[1], seq_len).to(DEVICE)
cnn

FraudConvNet(
  (padding1): ConstantPad1d(padding=(1, 0), value=0)
  (conv1): Conv1d(15, 100, kernel_size=(2,), stride=(1,))
  (pooling): MaxPool1d(kernel_size=5, stride=5, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=100, out_features=100, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=100, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [74]:
config = dict(
    dataset_id = 'fraud-detection-handbook-transformed',
    validation = 'train test split',
    seed = 42,
    begin_date = '2018-07-25',
    delta_train = 7,
    delta_delay = 7,
    delta_test = 7,
    batch_size=64,
    num_workers=0,
    seq_len=5,
    hidden_size = 100,
    conv1_num_filters = 100,
    conv1_filter_size=2,
    max_pooling=True,
    optimizer='adam',
    lr=0.0001,
    early_stopping=True,
    early_stopping_patience=2,
    max_epochs=100,
    scale=True,
    criterion='bce'
)
wandb.init(project="mgr-anomaly-tsxai-project", config=config, tags=['cnn', 'imbalance-not-considered'])
config = wandb.config

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01693333333338766, max=1.0)…

In [75]:
optimizer = torch.optim.Adam(cnn.parameters(), lr = 0.0001)
criterion = torch.nn.BCELoss().to(DEVICE)
cnn,training_execution_time,train_losses,valid_losses = \
    training_loop_and_saving_best_wandb(cnn,
                  training_generator,
                  valid_generator,
                  optimizer,
                  criterion,
                  verbose=True,
                  save_path='models/DL/cnn/cnn_model.pt')


Epoch 0: train loss: 0.11350999349535908
valid loss: 0.04332761334234981
New best score: 0.04332761334234981

Epoch 1: train loss: 0.04622721335855998
valid loss: 0.03007508676799391
New best score: 0.03007508676799391

Epoch 2: train loss: 0.03614304168083052
valid loss: 0.026291765901387307
New best score: 0.026291765901387307

Epoch 3: train loss: 0.03290854613287784
valid loss: 0.024909574793946874
New best score: 0.024909574793946874

Epoch 4: train loss: 0.030797378536461573
valid loss: 0.024116291352318693
New best score: 0.024116291352318693

Epoch 5: train loss: 0.0292948524776604
valid loss: 0.023185593134576018
New best score: 0.023185593134576018

Epoch 6: train loss: 0.027989952710820415
valid loss: 0.02238688605819712
New best score: 0.02238688605819712

Epoch 7: train loss: 0.026988997871820134
valid loss: 0.022145571418951362
New best score: 0.022145571418951362

Epoch 8: train loss: 0.026260754396697332
valid loss: 0.02172331586085728
New best score: 0.021723315860857

In [76]:
start_time=time.time()
valid_predictions = get_predictions_sequential(cnn, valid_generator)
prediction_execution_time=time.time()-start_time

In [77]:
predictions_df = valid_df
predictions_df['predictions'] = valid_predictions
    
performance_df = performance_assessment_f1_included(predictions_df, top_k_list=[100])
performance_df

Unnamed: 0,AUC ROC,Average precision,F1 score,Card Precision@100
0,0.85,0.566,0.586,0.264


In [78]:
wandb.log({'Training execution time': training_execution_time})
wandb.log({'Prediction execution time': prediction_execution_time})
wandb.log({'AUC ROC': performance_df.loc[0,'AUC ROC']})
wandb.log({'Average precision': performance_df.loc[0,'Average precision']})
wandb.log({'F1 score': performance_df.loc[0,'F1 score']})
wandb.log({'Card Precision@100': performance_df.loc[0,'Card Precision@100']})

artifact = wandb.Artifact('cnn', type='cnn', description='trained cnn with 1 conv1d layer, max pooling and 1 dense layer')
artifact.add_dir('models/DL/cnn')
wandb.log_artifact(artifact)
wandb.finish()

[34m[1mwandb[0m: Adding directory to artifact (.\models\DL\cnn)... Done. 0.0s


0,1
AUC ROC,▁
Average precision,▁
Card Precision@100,▁
F1 score,▁
Prediction execution time,▁
Training execution time,▁
train loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁
val loss,█▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
AUC ROC,0.85
Average precision,0.566
Card Precision@100,0.264
F1 score,0.586
Prediction execution time,10.05
Training execution time,365.268
train loss,0.02315
val loss,0.02139


##### LSTM

In [79]:
lstm = FraudLSTM(x_train.shape[1]).to(DEVICE)
optimizer = torch.optim.Adam(lstm.parameters(), lr = 0.0001)
criterion = torch.nn.BCELoss()
lstm

FraudLSTM(
  (lstm): LSTM(15, 100, batch_first=True)
  (fc1): Linear(in_features=100, out_features=100, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=100, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [80]:
config = dict(
    dataset_id = 'fraud-detection-handbook-transformed',
    validation = 'train test split',
    seed = 42,
    begin_date = '2018-07-25',
    delta_train = 7,
    delta_delay = 7,
    delta_test = 7,
    batch_size=64,
    num_workers=0,
    seq_len=5,
    hidden_size = 100,
    hidden_size_lstm = 100,
    num_layers_lstm = 1,
    dropout = 0,
    optimizer='adam',
    lr=0.0001,
    early_stopping=True,
    early_stopping_patience=2,
    max_epochs=100,
    scale=True,
    criterion='bce'
)
wandb.init(project="mgr-anomaly-tsxai-project", config=config, tags=['lstm', 'imbalance-not-considered'])
config = wandb.config

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016916666666656966, max=1.0…

In [82]:
lstm,training_execution_time,train_losses,valid_losses = \
    training_loop_and_saving_best_wandb(lstm,
                  training_generator,
                  valid_generator,
                  optimizer,
                  criterion,
                  verbose=True,
                  save_path='models/DL/lstm/lstm_model.pt')


Epoch 0: train loss: 0.11769908162441112
valid loss: 0.02722328091523727
New best score: 0.02722328091523727

Epoch 1: train loss: 0.033310829799793246
valid loss: 0.024378997859763888
New best score: 0.024378997859763888

Epoch 2: train loss: 0.029553174446995796
valid loss: 0.022223314901182803
New best score: 0.022223314901182803

Epoch 3: train loss: 0.026766734665177806
valid loss: 0.021396628077411668
New best score: 0.021396628077411668

Epoch 4: train loss: 0.024907904123285533
valid loss: 0.020335928123963416
New best score: 0.020335928123963416

Epoch 5: train loss: 0.023890609810454683
valid loss: 0.020084351468373274
New best score: 0.020084351468373274

Epoch 6: train loss: 0.023037582767431057
valid loss: 0.01964684613150814
New best score: 0.01964684613150814

Epoch 7: train loss: 0.02251058735496972
valid loss: 0.01924232041127369
New best score: 0.01924232041127369

Epoch 8: train loss: 0.022025993510316807
valid loss: 0.019455202991560182
1  iterations since best sco

In [83]:
start_time=time.time()
valid_predictions = get_predictions_sequential(lstm, valid_generator)
prediction_execution_time=time.time()-start_time

In [84]:
predictions_df = valid_df
predictions_df['predictions'] = valid_predictions
    
performance_df = performance_assessment_f1_included(predictions_df, top_k_list=[100])
performance_df

Unnamed: 0,AUC ROC,Average precision,F1 score,Card Precision@100
0,0.858,0.662,0.697,0.277


In [85]:
wandb.log({'Training execution time': training_execution_time})
wandb.log({'Prediction execution time': prediction_execution_time})
wandb.log({'AUC ROC': performance_df.loc[0,'AUC ROC']})
wandb.log({'Average precision': performance_df.loc[0,'Average precision']})
wandb.log({'F1 score': performance_df.loc[0,'F1 score']})
wandb.log({'Card Precision@100': performance_df.loc[0,'Card Precision@100']})

artifact = wandb.Artifact('lstm', type='lstm', description='trained LSTM with 1 lstm layer and 1 hidden dense layer')
artifact.add_dir('models/DL/lstm')
wandb.log_artifact(artifact)
wandb.finish()

[34m[1mwandb[0m: Adding directory to artifact (.\models\DL\lstm)... Done. 0.0s


0,1
AUC ROC,▁
Average precision,▁
Card Precision@100,▁
F1 score,▁
Prediction execution time,▁
Training execution time,▁
train loss,█▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val loss,█▆▄▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
AUC ROC,0.858
Average precision,0.662
Card Precision@100,0.277
F1 score,0.697
Prediction execution time,9.212
Training execution time,566.41576
train loss,0.01826
val loss,0.01788


##### LSTM with Attention

In [86]:
x_batch, y_batch = next(iter(training_generator))

In [87]:
# sequence of all hidden states, and the last hidden and cell states - on previous LSTM
out_seq, (last_hidden,last_cell) = lstm.lstm(x_batch.transpose(1,2))

In [88]:
last_hidden.shape

torch.Size([1, 64, 100])

In [89]:
out_seq.shape

torch.Size([64, 5, 100])

In [90]:
# the hidden states of the whole batch
test_hidden_states_seq = out_seq

test_context_projector = torch.nn.Linear(x_batch.shape[1], out_seq.shape[2]).to(DEVICE)
# the context vector of the whole batch
test_context_vector = test_context_projector(x_batch[:,:,-1:].transpose(1,2))

In [91]:
seed_everything(SEED)
test_attention = Attention(100).to(DEVICE)

In [92]:
output_state, attn = test_attention(test_context_vector,test_hidden_states_seq)
output_state.shape

torch.Size([64, 1, 100])

In [93]:
attn[0,0]

tensor([0.5409, 0.4099, 0.0408, 0.0066, 0.0017], device='cuda:0',
       grad_fn=<SelectBackward0>)

In [94]:
# what happens if the last hidden state is used as a context vector instead
output, attn = test_attention(test_hidden_states_seq[:,4:,:],test_hidden_states_seq)

In [95]:
attn[0,0]

tensor([6.4132e-07, 5.8130e-06, 3.3866e-04, 1.5329e-02, 9.8433e-01],
       device='cuda:0', grad_fn=<SelectBackward0>)

In [96]:
seed_everything(SEED)
lstm_attn = FraudLSTMWithAttention(x_train.shape[1]).to(DEVICE)
optimizer = torch.optim.Adam(lstm_attn.parameters(), lr = 0.00008)
criterion = torch.nn.BCELoss().to(DEVICE)
training_generator,valid_generator = prepare_generators(training_set,valid_set,batch_size=64)
lstm_attn

FraudLSTMWithAttention(
  (lstm): LSTM(15, 100, batch_first=True)
  (ff): Linear(in_features=15, out_features=100, bias=True)
  (attention): Attention(
    (linear_out): Linear(in_features=200, out_features=100, bias=True)
  )
  (fc1): Linear(in_features=100, out_features=100, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=100, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [97]:
config = dict(
    dataset_id = 'fraud-detection-handbook-transformed',
    validation = 'train test split',
    seed = 42,
    begin_date = '2018-07-25',
    delta_train = 7,
    delta_delay = 7,
    delta_test = 7,
    batch_size=64,
    num_workers=0,
    seq_len=5,
    hidden_size = 100,
    hidden_size_lstm = 100,
    num_layers_lstm = 1,
    dropout = 0,
    attention_out_dim=100,
    optimizer='adam',
    lr=0.00008,
    early_stopping=True,
    early_stopping_patience=2,
    max_epochs=100,
    scale=True,
    criterion='bce'
)
wandb.init(project="mgr-anomaly-tsxai-project", config=config, tags=['lstm', 'imbalance-not-considered', 'attention'])
config = wandb.config

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01693333333338766, max=1.0)…

In [98]:
lstm_attn,training_execution_time,train_losses,valid_losses = \
    training_loop_and_saving_best_wandb(lstm_attn,
                  training_generator,
                  valid_generator,
                  optimizer,
                  criterion,
                  verbose=True,
                  save_path='models/DL/lstm_attention/lstm_attention_model.pt')


Epoch 0: train loss: 0.10345629619253732
valid loss: 0.0217021710073952
New best score: 0.0217021710073952

Epoch 1: train loss: 0.026198114701422078
valid loss: 0.020528786274212632
New best score: 0.020528786274212632

Epoch 2: train loss: 0.02440665406609011
valid loss: 0.019647970981641463
New best score: 0.019647970981641463

Epoch 3: train loss: 0.023379573382812963
valid loss: 0.019510614707088862
New best score: 0.019510614707088862

Epoch 4: train loss: 0.022794361631710964
valid loss: 0.019047817616097508
New best score: 0.019047817616097508

Epoch 5: train loss: 0.022292002547958805
valid loss: 0.018893860004043278
New best score: 0.018893860004043278

Epoch 6: train loss: 0.021784094891717468
valid loss: 0.01878023694073425
New best score: 0.01878023694073425

Epoch 7: train loss: 0.02147723132669641
valid loss: 0.01882252073325115
1  iterations since best score.

Epoch 8: train loss: 0.02097141956182617
valid loss: 0.018680794560604938
New best score: 0.018680794560604938

In [99]:
start_time=time.time()
valid_predictions = get_predictions_sequential(lstm_attn, valid_generator)
prediction_execution_time=time.time()-start_time

In [100]:
predictions_df = valid_df
predictions_df['predictions'] = valid_predictions
    
performance_df = performance_assessment_f1_included(predictions_df, top_k_list=[100])
performance_df

Unnamed: 0,AUC ROC,Average precision,F1 score,Card Precision@100
0,0.859,0.648,0.685,0.273


In [101]:
wandb.log({'Training execution time': training_execution_time})
wandb.log({'Prediction execution time': prediction_execution_time})
wandb.log({'AUC ROC': performance_df.loc[0,'AUC ROC']})
wandb.log({'Average precision': performance_df.loc[0,'Average precision']})
wandb.log({'F1 score': performance_df.loc[0,'F1 score']})
wandb.log({'Card Precision@100': performance_df.loc[0,'Card Precision@100']})

artifact = wandb.Artifact('lstm_attention', type='lstm', description='trained LSTM with 1 lstm layer and 1 hidden dense layer')
artifact.add_dir('models/DL/lstm_attention')
wandb.log_artifact(artifact)
wandb.finish()

[34m[1mwandb[0m: Adding directory to artifact (.\models\DL\lstm_attention)... Done. 0.0s


0,1
AUC ROC,▁
Average precision,▁
Card Precision@100,▁
F1 score,▁
Prediction execution time,▁
Training execution time,▁
train loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val loss,█▆▄▄▃▃▃▃▂▃▂▃▂▁▂▂▁

0,1
AUC ROC,0.859
Average precision,0.648
Card Precision@100,0.273
F1 score,0.685
Prediction execution time,10.368
Training execution time,442.53794
train loss,0.01812
val loss,0.01799


##### CNN hypertuned

In [105]:
seed_everything(SEED)
training_generator,valid_generator = prepare_generators(training_set, valid_set, batch_size=64)
cnn = FraudConvNetWithDropout(x_train.shape[1], hidden_size=500, conv2_params=(100,2), p=0.2).to(DEVICE)
cnn

FraudConvNetWithDropout(
  (padding1): ConstantPad1d(padding=(1, 0), value=0)
  (conv1): Conv1d(15, 100, kernel_size=(2,), stride=(1,))
  (padding2): ConstantPad1d(padding=(1, 0), value=0)
  (conv2): Conv1d(100, 100, kernel_size=(2,), stride=(1,))
  (pooling): MaxPool1d(kernel_size=5, stride=5, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=100, out_features=500, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=500, out_features=1, bias=True)
  (sigmoid): Sigmoid()
  (dropout): Dropout(p=0.2, inplace=False)
)

In [106]:
config = dict(
    dataset_id = 'fraud-detection-handbook-transformed',
    validation = 'train test split',
    seed = 42,
    begin_date = '2018-07-25',
    delta_train = 7,
    delta_delay = 7,
    delta_test = 7,
    batch_size=64,
    num_workers=0,
    seq_len=5,
    hidden_size = 500,
    conv1_num_filters = 100,
    conv1_filter_size=2,
    conv2_num_filters = 100,
    conv2_filter_size=2,
    max_pooling=True,
    optimizer='adam',
    lr=0.001,
    dropout=0.2,
    early_stopping=False,
    max_epochs=10,
    scale=True,
    criterion='bce'
)
wandb.init(project="mgr-anomaly-tsxai-project", config=config, tags=['cnn', 'imbalance-not-considered', 'hypertuned'])
config = wandb.config

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016916666666656966, max=1.0…

In [107]:
optimizer = torch.optim.Adam(cnn.parameters(), lr = 0.001)
criterion = torch.nn.BCELoss().to(DEVICE)
cnn,training_execution_time,train_losses,valid_losses = \
    training_loop_and_saving_best_wandb(cnn,
                  training_generator,
                  valid_generator,
                  optimizer,
                  criterion,
                  max_epochs=10,
                  apply_early_stopping=False,
                  verbose=True,
                  save_path='models/DL/cnn_hypertuned/cnn_hypertuned_model.pt')


Epoch 0: train loss: 0.039928647961138514
valid loss: 0.022087659611674585

Epoch 1: train loss: 0.02892241059583999
valid loss: 0.023961937860356736

Epoch 2: train loss: 0.02733318814218719
valid loss: 0.02203040748263348

Epoch 3: train loss: 0.026041931413592866
valid loss: 0.021224333546022614

Epoch 4: train loss: 0.025165783186411095
valid loss: 0.021952382443075787

Epoch 5: train loss: 0.024826697816024534
valid loss: 0.02247233640051401

Epoch 6: train loss: 0.02387879006511195
valid loss: 0.021559467048847366

Epoch 7: train loss: 0.023080084664518073
valid loss: 0.02227427572682449

Epoch 8: train loss: 0.022720476658469473
valid loss: 0.02111525116933642

Epoch 9: train loss: 0.021741562783240652
valid loss: 0.0225722527006753


In [108]:
start_time=time.time()
valid_predictions = get_predictions_sequential(cnn, valid_generator)
prediction_execution_time=time.time()-start_time

In [109]:
predictions_df = valid_df
predictions_df['predictions'] = valid_predictions
    
performance_df = performance_assessment_f1_included(predictions_df, top_k_list=[100])
performance_df

Unnamed: 0,AUC ROC,Average precision,F1 score,Card Precision@100
0,0.861,0.58,0.6,0.261


In [110]:
wandb.log({'Training execution time': training_execution_time})
wandb.log({'Prediction execution time': prediction_execution_time})
wandb.log({'AUC ROC': performance_df.loc[0,'AUC ROC']})
wandb.log({'Average precision': performance_df.loc[0,'Average precision']})
wandb.log({'F1 score': performance_df.loc[0,'F1 score']})
wandb.log({'Card Precision@100': performance_df.loc[0,'Card Precision@100']})

artifact = wandb.Artifact('cnn_hypertuned', type='cnn', description='hypertuned CNN with 2 conv layers, max pooling and dropout')
artifact.add_dir('models/DL/cnn_hypertuned')
wandb.log_artifact(artifact)
wandb.finish()

[34m[1mwandb[0m: Adding directory to artifact (.\models\DL\cnn_hypertuned)... Done. 0.0s


0,1
AUC ROC,▁
Average precision,▁
Card Precision@100,▁
F1 score,▁
Prediction execution time,▁
Training execution time,▁
train loss,█▄▃▃▂▂▂▂▁▁
val loss,▃█▃▁▃▄▂▄▁▅

0,1
AUC ROC,0.861
Average precision,0.58
Card Precision@100,0.261
F1 score,0.6
Prediction execution time,9.184
Training execution time,245.792
train loss,0.02174
val loss,0.02257


##### LSTM hypertuned

In [115]:
seed_everything(SEED)
training_generator,valid_generator = prepare_generators(training_set, valid_set, batch_size=128)
lstm = FraudLSTM(x_train.shape[1], hidden_size=500, dropout_lstm=0.2).to(DEVICE)
optimizer = torch.optim.Adam(lstm.parameters(), lr = 0.001)
criterion = torch.nn.BCELoss().to(DEVICE)
lstm

FraudLSTM(
  (lstm): LSTM(15, 100, batch_first=True, dropout=0.2)
  (fc1): Linear(in_features=100, out_features=500, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=500, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [116]:
config = dict(
    dataset_id = 'fraud-detection-handbook-transformed',
    validation = 'train test split',
    seed = 42,
    begin_date = '2018-07-25',
    delta_train = 7,
    delta_delay = 7,
    delta_test = 7,
    batch_size=128,
    num_workers=0,
    seq_len=5,
    hidden_size = 500,
    hidden_size_lstm = 100,
    num_layers_lstm = 1,
    dropout = 0.2,
    optimizer='adam',
    lr=0.001,
    early_stopping=False,
    early_stopping_patience=2,
    max_epochs=5,
    scale=True,
    criterion='bce'
)
wandb.init(project="mgr-anomaly-tsxai-project", config=config, tags=['lstm', 'imbalance-not-considered', 'hypertuned'])
config = wandb.config

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01693333333338766, max=1.0)…

In [117]:
lstm,training_execution_time,train_losses,valid_losses = \
    training_loop_and_saving_best_wandb(lstm,
                  training_generator,
                  valid_generator,
                  optimizer,
                  criterion,
                  max_epochs=5,
                  apply_early_stopping=False,
                  verbose=True,
                  save_path='models/DL/lstm_hypertuned/lstm_hypertuned_model.pt')


Epoch 0: train loss: 0.04487916065404907
valid loss: 0.021035679431497954

Epoch 1: train loss: 0.023487651652975245
valid loss: 0.020190327492843517

Epoch 2: train loss: 0.022156137640295944
valid loss: 0.01939645365051372

Epoch 3: train loss: 0.020882145353649916
valid loss: 0.01768297026839766

Epoch 4: train loss: 0.02023269282264393
valid loss: 0.017787850829193118


In [118]:
start_time=time.time()
valid_predictions = get_predictions_sequential(lstm, valid_generator)
prediction_execution_time=time.time()-start_time

In [119]:
predictions_df = valid_df
predictions_df['predictions'] = valid_predictions
    
performance_df = performance_assessment_f1_included(predictions_df, top_k_list=[100])
performance_df

Unnamed: 0,AUC ROC,Average precision,F1 score,Card Precision@100
0,0.86,0.663,0.69,0.28


In [120]:
wandb.log({'Training execution time': training_execution_time})
wandb.log({'Prediction execution time': prediction_execution_time})
wandb.log({'AUC ROC': performance_df.loc[0,'AUC ROC']})
wandb.log({'Average precision': performance_df.loc[0,'Average precision']})
wandb.log({'F1 score': performance_df.loc[0,'F1 score']})
wandb.log({'Card Precision@100': performance_df.loc[0,'Card Precision@100']})

artifact = wandb.Artifact('lstm_hypertuned', type='lstm', description='hypertuned LSTM with a dropout')
artifact.add_dir('models/DL/lstm_hypertuned')
wandb.log_artifact(artifact)
wandb.finish()

[34m[1mwandb[0m: Adding directory to artifact (.\models\DL\lstm_hypertuned)... Done. 0.0s


0,1
AUC ROC,▁
Average precision,▁
Card Precision@100,▁
F1 score,▁
Prediction execution time,▁
Training execution time,▁
train loss,█▂▂▁▁
val loss,█▆▅▁▁

0,1
AUC ROC,0.86
Average precision,0.663
Card Precision@100,0.28
F1 score,0.69
Prediction execution time,9.193
Training execution time,107.70441
train loss,0.02023
val loss,0.01779


##### LSTM with Attention hypertuned

In [36]:
seed_everything(SEED)
lstm_attn = FraudLSTMWithAttention(x_train.shape[1], hidden_size = 500, dropout_lstm=0.2).to(DEVICE)
optimizer = torch.optim.Adam(lstm_attn.parameters(), lr = 0.0001)
criterion = torch.nn.BCELoss().to(DEVICE)
training_generator,valid_generator = prepare_generators(training_set,valid_set,batch_size=128)
lstm_attn

FraudLSTMWithAttention(
  (lstm): LSTM(15, 100, batch_first=True, dropout=0.2)
  (ff): Linear(in_features=15, out_features=100, bias=True)
  (attention): Attention(
    (linear_out): Linear(in_features=200, out_features=100, bias=True)
  )
  (fc1): Linear(in_features=100, out_features=500, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=500, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [37]:
config = dict(
    dataset_id = 'fraud-detection-handbook-transformed',
    validation = 'train test split',
    seed = 42,
    begin_date = '2018-07-25',
    delta_train = 7,
    delta_delay = 7,
    delta_test = 7,
    batch_size=128,
    num_workers=0,
    seq_len=5,
    hidden_size = 500,
    hidden_size_lstm = 100,
    num_layers_lstm = 1,
    dropout = 0.2,
    attention_out_dim=100,
    optimizer='adam',
    lr=0.0001,
    early_stopping=True,
    early_stopping_patience=2,
    max_epochs=10,
    scale=True,
    criterion='bce'
)
wandb.init(project="mgr-anomaly-tsxai-project", config=config, tags=['lstm', 'imbalance-not-considered', 'attention', 'hypertuned'])
config = wandb.config

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mchamera[0m ([33mmgr-anomaly-tsxai[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

In [38]:
lstm_attn,training_execution_time,train_losses,valid_losses = \
    training_loop_and_saving_best_wandb(lstm_attn,
                  training_generator,
                  valid_generator,
                  optimizer,
                  criterion,
                  verbose=True,
                  max_epochs=10,
                  apply_early_stopping=False,
                  save_path='models/DL/lstm_attention_hypertuned/lstm_attention_hypertuned_model.pt')


Epoch 0: train loss: 0.10548595289822604
valid loss: 0.021504723847697298

Epoch 1: train loss: 0.02578488107811638
valid loss: 0.020385841143065374

Epoch 2: train loss: 0.024271011042691783
valid loss: 0.020096459284660664

Epoch 3: train loss: 0.02343763606369061
valid loss: 0.019549743625677015

Epoch 4: train loss: 0.0227407039727225
valid loss: 0.019702040369461767

Epoch 5: train loss: 0.022185817820529308
valid loss: 0.01921654843870656

Epoch 6: train loss: 0.02175957207607166
valid loss: 0.019327489653343993

Epoch 7: train loss: 0.021197493755246155
valid loss: 0.019075061447763872

Epoch 8: train loss: 0.020854293203371333
valid loss: 0.018955326651824908

Epoch 9: train loss: 0.02040450737979014
valid loss: 0.018721931482315487


In [39]:
start_time=time.time()
valid_predictions = get_predictions_sequential(lstm_attn, valid_generator)
prediction_execution_time=time.time()-start_time

In [40]:
predictions_df = valid_df
predictions_df['predictions'] = valid_predictions
    
performance_df = performance_assessment_f1_included(predictions_df, top_k_list=[100])
performance_df

Unnamed: 0,AUC ROC,Average precision,F1 score,Card Precision@100
0,0.859,0.64,0.688,0.277


In [41]:
wandb.log({'Training execution time': training_execution_time})
wandb.log({'Prediction execution time': prediction_execution_time})
wandb.log({'AUC ROC': performance_df.loc[0,'AUC ROC']})
wandb.log({'Average precision': performance_df.loc[0,'Average precision']})
wandb.log({'F1 score': performance_df.loc[0,'F1 score']})
wandb.log({'Card Precision@100': performance_df.loc[0,'Card Precision@100']})

artifact = wandb.Artifact('lstm_attention_hypertuned', type='lstm', description='hypertuned LSTM')
artifact.add_dir('models/DL/lstm_attention_hypertuned')
wandb.log_artifact(artifact)
wandb.finish()

[34m[1mwandb[0m: Adding directory to artifact (.\models\DL\lstm_attention_hypertuned)... Done. 0.0s


0,1
AUC ROC,▁
Average precision,▁
Card Precision@100,▁
F1 score,▁
Prediction execution time,▁
Training execution time,▁
train loss,█▁▁▁▁▁▁▁▁▁
val loss,█▅▄▃▃▂▃▂▂▁

0,1
AUC ROC,0.859
Average precision,0.64
Card Precision@100,0.277
F1 score,0.688
Prediction execution time,9.26
Training execution time,224.35897
train loss,0.0204
val loss,0.01872
