In [41]:
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
import torch_geometric.nn as pyg_nn
from torch_geometric.data import Data, DataLoader


In [42]:
# Loading data
df = pd.read_csv('creditcard.csv')
x = df.drop(['Class'], axis=1).values.astype(np.float32)
y = df['Class'].values.astype(np.int64)
edge_index = torch.tensor([[], []], dtype=torch.long)

In [43]:
# Create edges based on the distance between nodes
# k = 5
# distances = kneighbors_graph(x.numpy(), k, mode='distance', include_self=False)
# edge_index = torch.tensor(distances.nonzero(), dtype=torch.long).t()
# edge_attr = torch.tensor(distances.data, dtype=torch.float)
# data = Data(x=x, y=y, edge_index=edge_index, edge_attr=edge_attr)


# Create a PyTorch Geometric Data object
data = Data(x=torch.tensor(x), y=torch.tensor(y.reshape(-1, 1)), edge_index=edge_index)

In [44]:
# Split the data into train and test sets
train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
train_mask[:int(0.8 * data.num_nodes)] = 1
data.train_mask = train_mask

test_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
test_mask[int(0.8 * data.num_nodes):] = 1
data.test_mask = test_mask

In [46]:
# GNN model Definition
class FraudGNN(torch.nn.Module):
    def __init__(self, num_features):
        super(FraudGNN, self).__init__()
        self.conv1 = pyg_nn.GCNConv(num_features, 32)
        self.conv2 = pyg_nn.GCNConv(32, 16)
        self.conv3 = pyg_nn.GCNConv(16, 1)

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, training=self.training)
        x = F.relu(self.conv2(x, edge_index))
        x = F.dropout(x, training=self.training)
        x = self.conv3(x, edge_index)
        x = torch.sigmoid(x)  # Added sigmoid activation
        return x

In [47]:
# Initialize the model and optimizer
model = FraudGNN(num_features=data.num_features)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [48]:
# Train the model
model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = F.binary_cross_entropy(out[data.train_mask], data.y[data.train_mask].float())
    loss.backward()
    optimizer.step()

In [49]:
# Evaluate the model
model.eval()
out = model(data.x, data.edge_index)
pred = (out > 0.5).float()
acc = (pred[data.test_mask] == data.y[data.test_mask].float()).sum().item() / len(data.test_mask)
print('Accuracy:', acc)

# for epoch in range(10):
#     total_loss = 0
#     for data in DataLoader(train_data, batch_size=64, shuffle=True):
#         data = data.to(device)
#         optimizer.zero_grad()
#         output = model(data)
#         loss = F.binary_cross_entropy_with_logits(output, data.y.view(-1, 1))
#         loss.backward()
#         optimizer.step()
#         total_loss += loss.item() * data.num_graphs
#     print("Epoch {} - Loss: {:.4f}".format(epoch+1, total_loss/len(train_data)))

Accuracy: 0.19973877046561356
