# Fraud Detection Use case using Deep Learning

In [27]:
import pandas as pd
import torch

url ="fraud_transactions.csv"

df_actual = pd.read_csv(url, sep=",")
df_actual.head()

Unnamed: 0.1,Unnamed: 0,TRANSACTION_ID,TX_DATETIME,CUSTOMER_ID,TERMINAL_ID,TX_AMOUNT,TX_FRAUD
0,0,0,2023-02-01 00:43:37,901,8047,82,1
1,1,1,2023-02-01 01:20:13,2611,7777,15,0
2,2,2,2023-02-01 01:22:52,4212,3336,53,0
3,3,3,2023-02-01 01:26:40,1293,7432,59,0
4,4,4,2023-02-01 01:52:23,2499,1024,25,0


In [28]:
df_transactions = df_actual[['CUSTOMER_ID','TERMINAL_ID','TX_AMOUNT','TX_FRAUD']]
#df_transactions=df_transactions.head(10)
df_transactions=df_transactions.head(50000)
df_transactions

Unnamed: 0,CUSTOMER_ID,TERMINAL_ID,TX_AMOUNT,TX_FRAUD
0,901,8047,82,1
1,2611,7777,15,0
2,4212,3336,53,0
3,1293,7432,59,0
4,2499,1024,25,0
...,...,...,...,...
49995,1541,3469,66,0
49996,489,3854,89,1
49997,4125,7519,12,0
49998,3360,5215,86,1


In [29]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedShuffleSplit

print("No of Fraud Transactions:", df_transactions['TX_FRAUD'].value_counts()[0])
print("No of Non Fraud Transactions:", df_transactions['TX_FRAUD'].value_counts()[1])

print('No Frauds', round(df_transactions['TX_FRAUD'].value_counts()[0]/len(df_transactions) * 100,2), '% of the dataset')
print('Frauds', round(df_transactions['TX_FRAUD'].value_counts()[1]/len(df_transactions) * 100,2), '% of the dataset')



X = df_transactions.drop('TX_FRAUD', axis=1)
y = df_transactions['TX_FRAUD']



No of Fraud Transactions: 37870
No of Non Fraud Transactions: 12130
No Frauds 75.74 % of the dataset
Frauds 24.26 % of the dataset


In [30]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)



In [31]:
x_train = torch.FloatTensor(X_train.values)
x_test = torch.FloatTensor(X_test.values)
y_train = torch.FloatTensor(y_train.values)
y_test = torch.FloatTensor(y_test.values)

In [32]:
if torch.cuda.is_available():
    DEVICE = "cuda" 
else:
    DEVICE = "cpu"
print("Selected device is",DEVICE)

Selected device is cpu


In [33]:
class FraudDataset(torch.utils.data.Dataset):
    
    def __init__(self, x, y):
        'Initialization'
        self.x = x
        self.y = y

    def __len__(self):
        'Returns the total number of samples'
        return len(self.x)

    def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample index
        if self.y is not None:
            return self.x[index].to(DEVICE), self.y[index].to(DEVICE)
        else:
            return self.x[index].to(DEVICE)

In [34]:
train_loader_params = {'batch_size': 64,
          'shuffle': True,
          'num_workers': 0}
test_loader_params = {'batch_size': 64,
          'num_workers': 0}

# Loaders

training_set = FraudDataset(x_train, y_train)

testing_set = FraudDataset(x_test, y_test)


train_loader = torch.utils.data.DataLoader(training_set, **train_loader_params)
test_loader = torch.utils.data.DataLoader(testing_set, **test_loader_params)

In [45]:
class SimpleFraudMLP(torch.nn.Module):
    
    def __init__(self):
        super().__init__()
        
        self.first_sec = torch.nn.Sequential(
                           torch.nn.Linear(3, 450),
                           torch.nn.ReLU(),
        )
        self.second_sec = torch.nn.Sequential(
                           torch.nn.Linear(450, 450),
                           torch.nn.ReLU(),
                           torch.nn.Linear(450, 1),
                           torch.nn.Sigmoid(),
        )
        
    def forward(self, x):
        return self.second_sec(self.first_sec(x))


In [46]:
fraud_nn_model = SimpleFraudMLP().to(DEVICE)

In [47]:
from torch import nn, optim
#loss_func = nn.CrossEntropyLoss()   
loss_func = torch.nn.BCELoss().to(DEVICE)
loss_func


BCELoss()

In [48]:
#optimizer = optim.Adam(fraud_nn_model.parameters(), lr = 0.01)  
optimizer = torch.optim.SGD(fraud_nn_model.parameters(), lr = 0.07)
optimizer


SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.07
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)

In [49]:
fraud_nn_model.eval()

SimpleFraudMLP(
  (first_sec): Sequential(
    (0): Linear(in_features=3, out_features=450, bias=True)
    (1): ReLU()
  )
  (second_sec): Sequential(
    (0): Linear(in_features=450, out_features=450, bias=True)
    (1): ReLU()
    (2): Linear(in_features=450, out_features=1, bias=True)
    (3): Sigmoid()
  )
)

In [50]:
def evaluate_model(model,generator,loss_func):
    model.eval()
    batch_losses = []
    for x_batch, y_batch in generator:
        # Forward pass
        y_pred = model(x_batch)
        # Compute Loss
        loss = loss_func(y_pred.squeeze(), y_batch)
        batch_losses.append(loss.item())
    mean_loss = np.mean(batch_losses)    
    return mean_loss
    
#evaluate_model(fraud_nn_model,testing_loader,loss_func)

In [53]:
def train(fraud_nn_mode,num_epochs):
    
    fraud_nn_model.train()
               
    for epoch in range(num_epochs):

         #for x_batch, y_batch in train_loader:
                
            output = fraud_nn_model(x_train)  
            
            print(output.squeeze())
            
            print(y_train)
            
            loss = loss_func(output.squeeze(), y_train)
            
            # clear gradients for this training step   
            optimizer.zero_grad()           
            
            # backpropagation, compute gradients 
            loss.backward()    
            # apply gradients             
            optimizer.step()                
            
            print(epoch, loss.item())

        
    pass
            
train (fraud_nn_model, 10)

tensor([0., 0., 0.,  ..., 0., 0., 0.], grad_fn=<SqueezeBackward0>)
tensor([1., 1., 1.,  ..., 0., 0., 0.])
0 24.191429138183594
tensor([0., 0., 0.,  ..., 0., 0., 0.], grad_fn=<SqueezeBackward0>)
tensor([1., 1., 1.,  ..., 0., 0., 0.])
1 24.191429138183594
tensor([0., 0., 0.,  ..., 0., 0., 0.], grad_fn=<SqueezeBackward0>)
tensor([1., 1., 1.,  ..., 0., 0., 0.])
2 24.191429138183594
tensor([0., 0., 0.,  ..., 0., 0., 0.], grad_fn=<SqueezeBackward0>)
tensor([1., 1., 1.,  ..., 0., 0., 0.])
3 24.191429138183594
tensor([0., 0., 0.,  ..., 0., 0., 0.], grad_fn=<SqueezeBackward0>)
tensor([1., 1., 1.,  ..., 0., 0., 0.])
4 24.191429138183594
tensor([0., 0., 0.,  ..., 0., 0., 0.], grad_fn=<SqueezeBackward0>)
tensor([1., 1., 1.,  ..., 0., 0., 0.])
5 24.191429138183594
tensor([0., 0., 0.,  ..., 0., 0., 0.], grad_fn=<SqueezeBackward0>)
tensor([1., 1., 1.,  ..., 0., 0., 0.])
6 24.191429138183594
tensor([0., 0., 0.,  ..., 0., 0., 0.], grad_fn=<SqueezeBackward0>)
tensor([1., 1., 1.,  ..., 0., 0., 0.])
7 24.

In [16]:
import numpy as np
import time

n_epochs = 50
#Setting the model in training mode
fraud_nn_model.train()

#Training loop
start_time=time.time()
epochs_train_losses = []
epochs_test_losses = []

for epoch in range(n_epochs):
    train_loss=[]
    train_loss1=0
    for x_batch, y_batch in train_loader:
        
        fraud_nn_model.train()
        
        # Performing the forward pass on the current batch
        y_pred = fraud_nn_model(x_batch)
        # Computing the loss given the current predictions
        loss = loss_func(y_pred.squeeze(), y_batch)
        # Computing the gradients over the backward pass
        
        # Removing previously computed gradients
        optimizer.zero_grad()
        
        loss.backward()
        # Performing an optimization step from the current gradients
        optimizer.step()
        # Storing the current step's loss for display purposes
        train_loss.append(loss.item())
        
        train_loss1 += loss.item()*x_batch.size(0)
    avg_loss1 = train_loss1/len(train_loader.dataset)
    print("Batch Loss{} :", avg_loss1)
    
    #showing last training loss after each epoch
    epochs_train_losses.append(np.mean(train_loss))
    print('Epoch {}: train loss: {}'.format(epoch, np.mean(train_loss)))
    
    
    #evaluating the model on the test set after each epoch    
    val_loss = evaluate_model(fraud_nn_model,test_loader,loss_func)    
    epochs_test_losses.append(val_loss)
    print('test loss: {}'.format(val_loss))   
    print("")
  
    
training_execution_time=time.time()-start_time
training_execution_time

Batch Loss{} : 75.28534032679967
Epoch 0: train loss: 75.28690740177356
test loss: 75.60062057819772

Batch Loss{} : 75.8085714250837
Epoch 1: train loss: 75.80920279047825
test loss: 75.60062057819772

Batch Loss{} : 75.80857143205915
Epoch 2: train loss: 75.80757051847971
test loss: 75.60062057819772

Batch Loss{} : 75.80857142333984
Epoch 3: train loss: 75.80961085847788
test loss: 75.60062057819772

Batch Loss{} : 75.80857143380301
Epoch 4: train loss: 75.81001894042522
test loss: 75.60062057819772

Batch Loss{} : 75.80857143380301
Epoch 5: train loss: 75.80716245048006
test loss: 75.60062057819772

Batch Loss{} : 75.80857142857143
Epoch 6: train loss: 75.80838665447898
test loss: 75.60062057819772

Batch Loss{} : 75.80857143380301
Epoch 7: train loss: 75.81001894042522
test loss: 75.60062057819772

Batch Loss{} : 75.80857142857143
Epoch 8: train loss: 75.80838665447898
test loss: 75.60062057819772

Batch Loss{} : 75.80857142857143
Epoch 9: train loss: 75.80838665447898
test loss: 

111.07959914207458