In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from net import FraudNet, AttentionTransformerFraudNet, EnhancedFraudNet  # Import fraud detection model
from data import get_dataloaders_fraud  # Import dataset functions
from evaluation import evaluate_model  # Import evaluation function
from train import train_model, set_all_seeds  # Import training function from train.py
import pandas as pd
import sys
from plot import plot_metrics, plot_confusion_matrices, plot_aucpr
import pickle


set_all_seeds(42)

# Set dataset path
DATASET_PATH = "/home/khoa/Khoa/outsource/na_thesis/examples/hello-world/ml-to-fl/pt/src/data/creditcard.csv"
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

# Training hyperparameters
batch_size = 64
num_epochs = 15
learning_rate = 0.00003


df = pd.read_csv(DATASET_PATH)
input_size = df.shape[1] - 1
print(f"Detected input size: {input_size}")

train_loader, valid_loader, test_loader, class_weights = get_dataloaders_fraud(
    DATASET_PATH, batch_size=batch_size, use_smote=True, plot=True, save_plot_dir=save_plot_dir
)

In [7]:
model = FraudNet(input_size=input_size).to(DEVICE)
model_name = model.__class__.__name__

save_plot_dir = f'plot_{model_name}_{batch_size}_{num_epochs}_{learning_rate}'
class_weights = class_weights
pos_weight = torch.tensor([class_weights[1] / class_weights[0]], device=DEVICE)

# Loss Function (No weight balancing since using SMOTE)
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Call `train.py` instead of writing the training loop here
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='max', patience=3, verbose=True
)

train_loss_list, train_metrics_list, valid_metrics_list, test_metrics = train_model(
    model, num_epochs, train_loader, valid_loader, test_loader, optimizer,
    criterion, DEVICE, scheduler=scheduler, stochastic=True
)

# Create metrics directory if it doesn't exist
metrics_dir = 'metrics'
os.makedirs(metrics_dir, exist_ok=True)

# Save training metrics
metrics_data = {
    'train_metrics': train_metrics_list,
    'valid_metrics': valid_metrics_list,
    'test_metrics': test_metrics,
    'train_loss': train_loss_list
}

metrics_file = os.path.join(metrics_dir, f"{model_name}_{batch_size}_{num_epochs}_{learning_rate}_metrics.pickle")
with open(metrics_file, 'wb') as f:
    pickle.dump(metrics_data, f)

print(f"Metrics saved to {metrics_file}")

# Create plots
plot_metrics(train_metrics_list, fig_name="Training Metrics", save_path=f"{save_plot_dir}/train_metrics.png")
plot_metrics(valid_metrics_list, fig_name="Validation Metrics", save_path=f"{save_plot_dir}/valid_metrics.png")
plot_confusion_matrices(model, test_loader, threshold=0.85, save_path=f"{save_plot_dir}/confusion_matrix.png")
plot_aucpr(model, test_loader, device=DEVICE, save_path=f"{save_plot_dir}/auc_pr.png")

# Save the trained model
best_model_path = "best_model.pth"
model.load_state_dict(torch.load(best_model_path))
print("Loaded best model from training phase.")

# Save the best model explicitly at a clear location for future usage
final_model_path = f"./best_{model_name}_{batch_size}_{num_epochs}_{learning_rate}_model.pth"
torch.save(model.state_dict(), final_model_path)
print(f"Final best model saved explicitly at {final_model_path}")

# Evaluate model
print("Evaluating Model on Test Set...")
evaluate_model(model, test_loader, DEVICE)


Starting Training...
[Epoch 1, Batch 1] Loss: 1.0648
[Epoch 1, Batch 369] Loss: 0.6910
[Epoch 1, Batch 737] Loss: 0.5359
[Epoch 1, Batch 1105] Loss: 0.4662
[Epoch 1, Batch 1473] Loss: 0.3306
[Epoch 1, Batch 1841] Loss: 0.4013
[Epoch 1, Batch 2209] Loss: 0.2471
[Epoch 1, Batch 2577] Loss: 0.2222
[Epoch 1, Batch 2945] Loss: 0.3184
[Epoch 1, Batch 3313] Loss: 0.1858
[Epoch 1, Batch 3681] Loss: 0.3601
Epoch 1/15: Train Loss: 0.3886 | Train Acc: 96.66% | Valid Loss: 0.2123 | Valid Acc: 99.82% | Valid Precision: 48.21% | Valid Recall: 86.17% | Valid F1-score: 61.83% | Valid AUC-PR: 77.84%
Time Elapsed: 0.26 minutes
Model improved. Saving best model.
[Epoch 2, Batch 1] Loss: 0.2097
[Epoch 2, Batch 369] Loss: 0.2112
[Epoch 2, Batch 737] Loss: 0.1210
[Epoch 2, Batch 1105] Loss: 0.1824
[Epoch 2, Batch 1473] Loss: 0.1841
[Epoch 2, Batch 1841] Loss: 0.1528
[Epoch 2, Batch 2209] Loss: 0.1558
[Epoch 2, Batch 2577] Loss: 0.3327
[Epoch 2, Batch 2945] Loss: 0.2081
[Epoch 2, Batch 3313] Loss: 0.1061
[Ep

{'accuracy': 99.81055599612301,
 'precision': 46.26865671641791,
 'recall': 81.57894736842105,
 'f1_score': 59.04761904761905,
 'auc_roc': 98.47133831931993,
 'auc_pr': 71.59385520862077}