In [None]:
pip install FrEIA

Collecting FrEIA
  Downloading FrEIA-0.2.tar.gz (34 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.0.0->FrEIA)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.0.0->FrEIA)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.0.0->FrEIA)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.0.0->FrEIA)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.0.0->FrEIA)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.0.0->F

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Path to data directory
data_dir = '/content/drive/My Drive/bundlenet/'

# Checking if the file exists
import os
file_path = os.path.join(data_dir, 'creditcard.csv')
if not os.path.exists(file_path):
    raise FileNotFoundError(f"The 'creditcard.csv' file is not found in the directory {data_dir}. Please make sure the file is in the correct location in your Google Drive.")

print("Credit Card Fraud Detection dataset found. Loading data...")

# Loading data
import pandas as pd
data = pd.read_csv(file_path)

print("Data loaded successfully. Shape:", data.shape)

Mounted at /content/drive
Credit Card Fraud Detection dataset found. Loading data...
Data loaded successfully. Shape: (284807, 31)


In [None]:
import torch
print("CUDA is available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU Name:", torch.cuda.get_device_name(0))
    print("Number of GPUs:", torch.cuda.device_count())

CUDA is available: True
GPU Name: Tesla T4
Number of GPUs: 1


In [None]:
# Imports
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

# Scikit-learn imports
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, average_precision_score, f1_score

# SMOTE imports
from imblearn.over_sampling import SMOTE

# FrEIA imports
import FrEIA.framework as Ff
import FrEIA.modules as Fm

In [None]:
import torch
import torch.nn as nn
import FrEIA.framework as Ff
import FrEIA.modules as Fm

class BundleNet(nn.Module):
    def __init__(self, input_dim, num_nbhds=25, width=512, num_inv_blocks=5, nn_depth=5):
        super().__init__()
        self.input_dim = input_dim
        self.num_nbhds = num_nbhds

        def subnet_fc(c_in, c_out):
            return nn.Sequential(
                nn.Linear(c_in, width),
                nn.ReLU(),
                *[nn.Linear(width, width), nn.ReLU()] * nn_depth,
                nn.Linear(width, c_out)
            )

        # Building the invertible network
        nodes = [Ff.InputNode(input_dim, name='input')]
        cond_node = Ff.ConditionNode(1, name='fraud_prob')

        # Adding coupling blocks
        for k in range(num_inv_blocks):
            nodes.append(Ff.Node(nodes[-1],
                               Fm.GLOWCouplingBlock,
                               {'subnet_constructor': subnet_fc, 'clamp': 2.0},
                               conditions=cond_node,
                               name=f'coupling_{k}'))
            nodes.append(Ff.Node(nodes[-1],
                               Fm.PermuteRandom,
                               {'seed': k},
                               name=f'permute_{k}'))

        nodes.append(Ff.OutputNode(nodes[-1], name='output'))
        self.model = Ff.GraphINN(nodes + [cond_node])

    def forward(self, x, c):
        x = x.reshape(-1, self.input_dim)
        c = c.reshape(-1, 1)
        return self.model(x, c=[c])

    def reverse(self, z, c):
        z = z.reshape(-1, self.input_dim)
        c = c.reshape(-1, 1)
        return self.model(z, c=[c], rev=True)

    def sample_from_fiber(self, fraud_prob, n=1):
        z = torch.randn(n, self.input_dim)
        c = fraud_prob.repeat(n, 1)
        return self.reverse(z, c)

# Checking GPU
print("CUDA is available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU Name:", torch.cuda.get_device_name(0))
    print("Number of GPUs:", torch.cuda.device_count())

# Device setting
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Data preprocessing
print("Preprocessing data...")
X = data.drop('Class', axis=1)
y = data['Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Applying SMOTE for class balance...")
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_scaled, y_train)

# Move data to GPU immediately after creation
X_train_tensor = torch.FloatTensor(X_train_resampled).to(device)
y_train_tensor = torch.FloatTensor(y_train_resampled.values).unsqueeze(1).to(device)
X_test_tensor = torch.FloatTensor(X_test_scaled).to(device)
y_test_tensor = torch.FloatTensor(y_test.values).unsqueeze(1).to(device)

print("Data preprocessing completed.")

# Model initialization
print("Initializing model...")
input_dim = X_train_tensor.shape[1]
model = BundleNet(input_dim).to(device)

# Loss functions and optimizer (move to GPU)
mse_loss = nn.MSELoss().to(device)
bce_loss = nn.BCEWithLogitsLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Create DataLoader
batch_size = 64
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Modified train_model function
def train_model(model, train_loader, num_epochs=100):
    print("Starting training...")
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        for batch_x, batch_y in train_loader:
            optimizer.zero_grad()

            # Forward pass
            encoded, _ = model(batch_x, batch_y)

            # Reconstruction
            reconstructed, _ = model.reverse(encoded, batch_y)

            # Losses
            recon_loss = mse_loss(reconstructed, batch_x)
            fraud_loss = bce_loss(encoded[:, 0:1], batch_y)

            loss = recon_loss + fraud_loss
            epoch_loss += loss.item()

            loss.backward()
            optimizer.step()

        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], '
                  f'Average Loss: {epoch_loss/len(train_loader):.4f}')

    return model

# Train the model
model = train_model(model, train_loader, num_epochs=100)

# Evaluate the model
print("Evaluating model...")
model.eval()
with torch.no_grad():
    X_test_tensor = X_test_tensor.to(device)
    y_test_tensor = y_test_tensor.to(device)
    encoded_test, _ = model(X_test_tensor, y_test_tensor)
    fraud_probs = torch.sigmoid(encoded_test[:, 0]).cpu()

y_test_np = y_test.values
auc_roc = roc_auc_score(y_test_np, fraud_probs)
avg_precision = average_precision_score(y_test_np, fraud_probs)
f1 = f1_score(y_test_np, (fraud_probs > 0.5))

print(f'AUC-ROC: {auc_roc:.4f}')
print(f'Average Precision: {avg_precision:.4f}')
print(f'F1-score: {f1:.4f}')

# Fiber exploration
print("\nExploring fibers...")
with torch.no_grad():
    test_device = torch.device('cpu')
    model = model.to(test_device)

    low_risk = torch.tensor([[0.1]], device=test_device)
    high_risk = torch.tensor([[0.9]], device=test_device)

    low_risk_sample, _ = model.sample_from_fiber(low_risk, n=5)
    high_risk_sample, _ = model.sample_from_fiber(high_risk, n=5)

print("\nLow-risk transaction samples (first 5 features):")
print(low_risk_sample[:, :5])
print("\nHigh-risk transaction samples (first 5 features):")
print(high_risk_sample[:, :5])

CUDA is available: True
GPU Name: Tesla T4
Number of GPUs: 1
Using device: cuda
Preprocessing data...
Applying SMOTE for class balance...
Data preprocessing completed.
Initializing model...
Starting training...
Epoch [10/100], Average Loss: 0.0000
Epoch [20/100], Average Loss: 0.0000
Epoch [30/100], Average Loss: 0.0000
Epoch [40/100], Average Loss: 0.0000
Epoch [50/100], Average Loss: 0.0000
Epoch [60/100], Average Loss: 0.0000
Epoch [70/100], Average Loss: 0.0000
Epoch [80/100], Average Loss: 0.0000
Epoch [90/100], Average Loss: 0.0000
Epoch [100/100], Average Loss: 0.0000
Evaluating model...
AUC-ROC: 1.0000
Average Precision: 1.0000
F1-score: 1.0000

Exploring fibers...

Low-risk transaction samples (first 5 features):
tensor([[  0.6097,   3.3165,  12.1029, 109.0091,  99.1455],
        [  1.3128, -21.7661,  42.8486, 273.4642, 279.2777],
        [  0.7159,  -2.7537,  14.8695,  62.1992,  93.6846],
        [  0.4581,   2.7457,  10.2465,  53.1518,  79.4078],
        [  1.0581,  -5.2629,