In [1]:
import pandas as pd
import numpy as np
import torch

# Data processing
data = pd.read_csv('./Dataset/creditcard_2023.csv')

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

data = data.drop(['id'], axis=1)
print(data.head(2))

         V1        V2        V3        V4        V5        V6        V7  \
0 -0.260648 -0.469648  2.496266 -0.083724  0.129681  0.732898  0.519014   
1  0.985100 -0.356045  0.558056 -0.429654  0.277140  0.428605  0.406466   

         V8        V9       V10  ...       V21       V22       V23       V24  \
0 -0.130006  0.727159  0.637735  ... -0.110552  0.217606 -0.134794  0.165959   
1 -0.133118  0.347452  0.529808  ... -0.194936 -0.605761  0.079469 -0.577395   

       V25       V26       V27       V28    Amount  Class  
0  0.12628 -0.434824 -0.081230 -0.151045  17982.10      0  
1  0.19009  0.296503 -0.248052 -0.064512   6531.37      0  

[2 rows x 30 columns]


In [3]:
from sklearn.preprocessing import StandardScaler
data = data.dropna(axis=0)

Scaler = StandardScaler()
arbitrary_features = ["V"+str(i+1) for i in range(27)]

# Scale the arbitray feature columns
for feature in arbitrary_features:
    data[feature] = Scaler.fit_transform(data[feature].values.reshape(-1, 1))

# Scale the amount column
data["Amount"] = Scaler.fit_transform(data["Amount"].values.reshape(-1, 1))

print(data.head(2))



         V1        V2        V3        V4        V5        V6        V7  \
0 -0.260648 -0.469648  2.496266 -0.083724  0.129681  0.732898  0.519014   
1  0.985100 -0.356045  0.558056 -0.429654  0.277140  0.428605  0.406466   

         V8        V9       V10  ...       V21       V22       V23       V24  \
0 -0.130006  0.727159  0.637735  ... -0.110552  0.217606 -0.134794  0.165959   
1 -0.133118  0.347452  0.529808  ... -0.194936 -0.605761  0.079469 -0.577395   

       V25       V26       V27       V28    Amount  Class  
0  0.12628 -0.434824 -0.081230 -0.151045  0.858447      0  
1  0.19009  0.296503 -0.248052 -0.064512 -0.796369      0  

[2 rows x 30 columns]


In [4]:
fraud = data["Class"].values
data = data.drop("Class", axis=1)

print(data.shape)

(568630, 29)


In [5]:
fraud = fraud.reshape(fraud.shape[0], 1)
data = data.values

# finally verifying the shapes of the inputs and the label
print("data.shape: ", data.shape)
print("lables shape: ", fraud.shape)

data.shape:  (568630, 29)
lables shape:  (568630, 1)


In [6]:
X = data
Y = fraud

# Train test split
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

# Checking the shapes
print("x_train shape: ", x_train.shape)
print("x_test shape: ", x_test.shape)

x_train shape:  (454904, 29)
x_test shape:  (113726, 29)


In [7]:
from torch.utils.data import DataLoader, TensorDataset

x_train_tensor = torch.from_numpy(x_train).to(device)
y_train_tensor = torch.from_numpy(y_train).to(device)

x_test_tensor = torch.from_numpy(x_test).to(device)
y_test_tensor = torch.from_numpy(y_test).to(device)

Train_tensor = TensorDataset(x_train_tensor, y_train_tensor)
Test_tensor = TensorDataset(x_test_tensor, y_test_tensor)

Train_dataset = DataLoader(Train_tensor, batch_size=512, shuffle=True)
Test_dataset = DataLoader(Test_tensor, batch_size=512, shuffle=True)

dat = next(iter(Train_dataset))
print(len(dat[0]))

512


In [8]:
# Building the model
import torch
import torch.nn as nn

class FeedForward(nn.Module):
    
    def __init__(self, input_dim, num_neurons):
        super(FeedForward, self).__init__()
        self.input_dim = input_dim
        self.num_neurons = num_neurons
        
        self.net = nn.Sequential(
            nn.Linear(self.input_dim, self.num_neurons)
            ,nn.ReLU()
        )
        
    def forward(self, x):
        return self.net(x)
    
    def to(self, device):
        self.net.to(device)
        return self

class Net(nn.Module):
    
    def __init__(self, in_features, num_layers, num_neurons):
        super(Net, self).__init__()
        self.in_features = in_features
        self.num_layers = num_layers
        self.num_neurons = num_neurons
        
        self.fc1 = nn.Linear(self.in_features, self.num_neurons)
        self.relu = nn.ReLU()
        self.blocks = [FeedForward(self.num_neurons, self.num_neurons).to(device) for _ in range(self.num_layers)]
        self.output_layer = nn.Linear(self.num_neurons, 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        output = self.relu(self.fc1(x))
        for block in self.blocks:
            output = block(output)
        output = self.sigmoid(self.output_layer(output))
        return output
    

In [9]:
# Defining hyperparams
epochs = 100
num_layers = 6
num_neurons = 64
alpha = 1e-4

# Define the model
model = Net(in_features=29, num_layers=num_layers, num_neurons=num_neurons)
model = model.to(device)

# Optimizer and loss
optimizer = torch.optim.Adam(model.parameters(), lr=alpha)
criterion = nn.BCELoss()

In [10]:
# Train the model
for epoch in range(epochs):
    train_loss = 0.0
    
    model.train()
    
    for data, label in Train_dataset:
        data = data.to(device)
        label = label.squeeze()
        label = label.to(device)
        optimizer.zero_grad()
        output = model.forward(data.float())
        
        loss = criterion(output.squeeze(), label.float())
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    train_loss = train_loss/len(Train_dataset.dataset)
    if(epoch%10==0):
        print('Epoch: {} \tTraining_loss: {:.6f}'.format(epoch, train_loss))

Epoch: 0 	Training_loss: 0.001348
Epoch: 10 	Training_loss: 0.000737
Epoch: 20 	Training_loss: 0.000423
Epoch: 30 	Training_loss: 0.000304
Epoch: 40 	Training_loss: 0.000253
Epoch: 50 	Training_loss: 0.000227
Epoch: 60 	Training_loss: 0.000205
Epoch: 70 	Training_loss: 0.000186
Epoch: 80 	Training_loss: 0.000170
Epoch: 90 	Training_loss: 0.000155


In [19]:
correct_preds = 0
total_samples = 0

with torch.no_grad():
      for data, labels in Test_dataset:
            labels = labels.squeeze()
            output = model.forward(data.float())
            output = output.squeeze(1)
            predictions = (output >= 0.5).float()
            correct_preds += (predictions == labels).sum().item()
            total_samples += labels.numel()
            
accuracy = correct_preds / total_samples
print("Model accuracy: {:.6f}".format(accuracy))


Model accuracy: 0.973823


In [20]:
# Saving the model
torch.save(model, 'fraud_detection_weights.pt')