In [2]:
#Importing Packages
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn.preprocessing as pre
from sklearn.preprocessing import StandardScaler
import os
from sklearn.model_selection import train_test_split as tts
from sklearn.preprocessing import LabelEncoder


In [3]:
#Move to CUDA if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [4]:
# #Importing data from UNSW_NB15
# train_df = pd.read_csv('data/UNSW_NB15_training-set.csv')
# test_df = pd.read_csv('data/UNSW_NB15_testing-set.csv')
#https://www.kaggle.com/datasets/nikhil1e9/loan-default
df = pd.read_csv('data/creditcard.csv')

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 31 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    284807 non-null  float64
 1   V1      284807 non-null  float64
 2   V2      284807 non-null  float64
 3   V3      284807 non-null  float64
 4   V4      284807 non-null  float64
 5   V5      284807 non-null  float64
 6   V6      284807 non-null  float64
 7   V7      284807 non-null  float64
 8   V8      284807 non-null  float64
 9   V9      284807 non-null  float64
 10  V10     284807 non-null  float64
 11  V11     284807 non-null  float64
 12  V12     284807 non-null  float64
 13  V13     284807 non-null  float64
 14  V14     284807 non-null  float64
 15  V15     284807 non-null  float64
 16  V16     284807 non-null  float64
 17  V17     284807 non-null  float64
 18  V18     284807 non-null  float64
 19  V19     284807 non-null  float64
 20  V20     284807 non-null  float64
 21  V21     28

In [50]:
object_cols = ['type']

# Initialize LabelEncoder
le = LabelEncoder()

# Apply Label Encoding to each object column
for col in object_cols:
    df[col] = le.fit_transform(df[col])

train_df, test_df = tts(df)


In [51]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 76209 entries, 86152 to 78232
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   step            76209 non-null  int64  
 1   type            76209 non-null  int64  
 2   amount          76209 non-null  float64
 3   nameOrig        76209 non-null  object 
 4   oldbalanceOrg   76209 non-null  float64
 5   newbalanceOrig  76209 non-null  float64
 6   nameDest        76209 non-null  object 
 7   oldbalanceDest  76209 non-null  float64
 8   newbalanceDest  76209 non-null  float64
 9   isFraud         76209 non-null  int64  
 10  isFlaggedFraud  76209 non-null  int64  
dtypes: float64(5), int64(4), object(2)
memory usage: 7.0+ MB


In [52]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 25404 entries, 30793 to 89891
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   step            25404 non-null  int64  
 1   type            25404 non-null  int64  
 2   amount          25404 non-null  float64
 3   nameOrig        25404 non-null  object 
 4   oldbalanceOrg   25404 non-null  float64
 5   newbalanceOrig  25404 non-null  float64
 6   nameDest        25404 non-null  object 
 7   oldbalanceDest  25404 non-null  float64
 8   newbalanceDest  25404 non-null  float64
 9   isFraud         25404 non-null  int64  
 10  isFlaggedFraud  25404 non-null  int64  
dtypes: float64(5), int64(4), object(2)
memory usage: 2.3+ MB


In [53]:
#Collecting required features
feature_columns = ['step', 'type','amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest']

target = 'isFraud'

X_train = train_df[feature_columns].values
y_train = train_df[target].values

#
# le = pre.LabelEncoder()
# train_df['attack_cat_encoded'] = le.fit_transform(train_df['attack_cat'])

X_test = test_df[feature_columns].values
y_test = test_df[target].values

In [54]:
original_train_rows = len(train_df)
train_df = train_df[~((train_df['oldbalanceOrg'] == 0) & (train_df['newbalanceOrig'] == 0))]
train_df = train_df[~((train_df['oldbalanceDest'] == 0) & (train_df['oldbalanceDest'] == 0))]
filtered_train_rows = len(train_df)
print(f"Train_df: Filtered {original_train_rows - filtered_train_rows} rows where oldbalanceOrg and newbalanceOrig were both 0.")
print(f"Remaining train_df rows: {filtered_train_rows}")

original_test_rows = len(test_df)
test_df = test_df[~((test_df['oldbalanceOrg'] == 0) & (test_df['newbalanceOrig'] == 0))]
test_df = test_df[~((test_df['oldbalanceDest'] == 0) & (test_df['newbalanceDest'] == 0))]
filtered_test_rows = len(test_df)
print(f"Test_df: Filtered {original_test_rows - filtered_test_rows} rows where oldbalanceOrg and newbalanceOrig were both 0.")
print(f"Remaining test_df rows: {filtered_test_rows}")


Train_df: Filtered 49714 rows where oldbalanceOrg and newbalanceOrig were both 0.
Remaining train_df rows: 26495
Test_df: Filtered 15533 rows where oldbalanceOrg and newbalanceOrig were both 0.
Remaining test_df rows: 9871


In [55]:
#Converting to tensor
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Converting to tensor and moving to the selected device

X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32).unsqueeze(1).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)

X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32).unsqueeze(1).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)

In [56]:
BATCH_SIZE = 64
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print("Data loaded from CSV and prepared for PyTorch.")
print("Training data shape:", X_train_tensor.shape)
print("Test data shape:", X_test_tensor.shape)
print(f"Number of unique labels (classes): {len(np.unique(y_train))}")


Data loaded from CSV and prepared for PyTorch.
Training data shape: torch.Size([76209, 1, 7])
Test data shape: torch.Size([25404, 1, 7])
Number of unique labels (classes): 2


In [57]:
class Net1DCNN(nn.Module):
    def __init__(self, input_channels, sequence_length, num_classes):
        super(Net1DCNN, self).__init__()

        self.conv1 = nn.Conv1d(in_channels=input_channels, out_channels=32, kernel_size=3, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool1d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool1d(kernel_size=2, stride=2)


        # self.fc1 = nn.Linear(160 * 1, 64)
        # self.fc2 = nn.Linear(64, num_classes)

        with torch.no_grad():
            dummy_input = torch.randn(1, input_channels, sequence_length)
            dummy_output = self.pool1(self.relu1(self.conv1(dummy_input)))
            dummy_output = self.pool2(self.relu2(self.conv2(dummy_output)))
            flattened_size = dummy_output.view(dummy_output.size(0), -1).shape[1]

        self.fc1 = nn.Linear(flattened_size, 128)
        self.relu_fc = nn.ReLU()
        self.fc2 = nn.Linear(128, num_classes)

        # self.dropout1 = nn.Dropout(p=0.2)
        # self.dropout2 = nn.Dropout(p=0.2)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)

        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)

        x = x.view(x.size(0), -1)
        # x = self.dropout1(x)

        x = self.fc1(x)
        x = self.relu_fc(x)
        # x = self.dropout2(x)

        x = self.fc2(x)
        return x


In [60]:
input_channels = X_train_tensor.shape[1]
sequence_length = X_train_tensor.shape[2]
num_classes = len(np.unique(y_train))

model = Net1DCNN(input_channels=input_channels, sequence_length=sequence_length, num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()

#Optimizers
# optimizer = optim.Adam(model.parameters(), lr=0.00001)
optimizer = optim.SGD(model.parameters(), lr=0.05, momentum=0.5)

num_epochs = 10
train_losses = []
train_accuracies = []
print("\nStarting model training...")
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    train_accuracy = 100 * correct / total
    train_losses.append(running_loss / len(train_loader))
    train_accuracies.append(train_accuracy)

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Train Accuracy: {train_accuracy:.2f}%")

print("Training finished.")


Starting model training...
Epoch [1/10], Loss: 0.0129, Train Accuracy: 99.86%
Epoch [2/10], Loss: 0.0074, Train Accuracy: 99.86%
Epoch [3/10], Loss: 0.0069, Train Accuracy: 99.86%
Epoch [4/10], Loss: 0.0066, Train Accuracy: 99.87%
Epoch [5/10], Loss: 0.0066, Train Accuracy: 99.87%
Epoch [6/10], Loss: 0.0064, Train Accuracy: 99.86%
Epoch [7/10], Loss: 0.0063, Train Accuracy: 99.86%
Epoch [8/10], Loss: 0.0062, Train Accuracy: 99.87%
Epoch [9/10], Loss: 0.0061, Train Accuracy: 99.87%
Epoch [10/10], Loss: 0.0061, Train Accuracy: 99.87%
Training finished.


In [61]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
test_accuracies = 100 * correct / len(test_loader)
print(f"\nAccuracy on the test set: {accuracy:.2f}%")


Accuracy on the test set: 99.87%


In [62]:
model_dir = 'models'
os.makedirs(model_dir, exist_ok=True)
model_path = os.path.join(model_dir, '1d_cnn_model.pth')


torch.save(model.state_dict(), model_path)
print(f"\nModel saved to {model_path}")


Model saved to models\1d_cnn_model.pth
