In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from net import FraudNet, AttentionTransformerFraudNet, EnhancedFraudNet  # Import fraud detection model
from data import get_dataloaders_fraud  # Import dataset functions
from evaluation import evaluate_model  # Import evaluation function
from train import train_model, set_all_seeds  # Import training function from train.py
import pandas as pd
import sys
from plot import plot_metrics, plot_confusion_matrices, plot_aucpr
import pickle


# Load fraud dataset
set_all_seeds(42)

# Set dataset path
DATASET_PATH = "/home/khoa/Khoa/outsource/na_thesis/examples/hello-world/ml-to-fl/pt/src/data/creditcard.csv"
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

# Training hyperparameters
batch_size = 32
num_epochs = 15
learning_rate = 0.00003

df = pd.read_csv(DATASET_PATH)
input_size = df.shape[1] - 1
print(f"Detected input size: {input_size}")
save_plot_dir = 'data_plot'
os.makedirs(save_plot_dir, exist_ok=True)
train_loader, valid_loader, test_loader, class_weights = get_dataloaders_fraud(
    DATASET_PATH, batch_size=batch_size, use_smote=True, plot=True, save_plot_dir=save_plot_dir, sampling_strategy=0.5
)

In [None]:
for model in [AttentionTransformerFraudNet(input_size=input_size).to(DEVICE), FraudNet(input_size=input_size).to(DEVICE), EnhancedFraudNet(input_size=input_size).to(DEVICE)]:
    for stochastic_val in [True, False]: 
        # Get model name for saving metrics
        model_name = model.__class__.__name__

        class_weights = class_weights
        pos_weight = torch.tensor([class_weights[1] / class_weights[0]], device=DEVICE)

        pos_weight = None

        # Loss Function (No weight balancing since using SMOTE)
        criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

        # Optimizer
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        # Call `train.py` instead of writing the training loop here
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='max', patience=3, verbose=True
        )

        train_loss_list, train_metrics_list, valid_metrics_list, test_metrics = train_model(
            model, num_epochs, train_loader, valid_loader, test_loader, optimizer,
            criterion, DEVICE, scheduler=scheduler, stochastic=stochastic_val
        )

        save_plot_dir = f'plot_{model_name}_{batch_size}_{num_epochs}_{learning_rate}'
        if pos_weight:
            save_plot_dir += "_pos_weight"
        if stochastic_val:
            save_plot_dir += "_stochastic"
            
        os.makedirs(save_plot_dir, exist_ok=True)

        # Create metrics directory if it doesn't exist
        metrics_dir = f'metrics'
        os.makedirs(metrics_dir, exist_ok=True)

        # Save training metrics
        metrics_data = {
            'train_metrics': train_metrics_list,
            'valid_metrics': valid_metrics_list,
            'test_metrics': test_metrics,
            'train_loss': train_loss_list
        }
        if pos_weight:
            metrics_file = os.path.join(metrics_dir, f"{model_name}_{batch_size}_{num_epochs}_{learning_rate}_pos_weight_metrics.pickle")
        else:
            metrics_file = os.path.join(metrics_dir, f"{model_name}_{batch_size}_{num_epochs}_{learning_rate}_metrics.pickle")
            
        with open(metrics_file, 'wb') as f:
            pickle.dump(metrics_data, f)

        print(f"Metrics saved to {metrics_file}")

        # Create plots
        plot_metrics(train_metrics_list, fig_name="Training Metrics", save_path=f"{save_plot_dir}/train_metrics.png")
        plot_metrics(valid_metrics_list, fig_name="Validation Metrics", save_path=f"{save_plot_dir}/valid_metrics.png")
        plot_confusion_matrices(model, test_loader, threshold=0.85, save_path=f"{save_plot_dir}/confusion_matrix.png")
        plot_aucpr(model, test_loader, device=DEVICE, save_path=f"{save_plot_dir}/auc_pr.png")

        # Save the trained model
        best_model_path = "best_model.pth"
        model.load_state_dict(torch.load(best_model_path))
        print("Loaded best model from training phase.")

        # Save the best model explicitly at a clear location for future usage
        if pos_weight:
            final_model_path = f"./best_{model_name}_{batch_size}_{num_epochs}_{learning_rate}_pos_weight_model.pth"
        else:
            final_model_path = f"./best_{model_name}_{batch_size}_{num_epochs}_{learning_rate}_model.pth"
            
        torch.save(model.state_dict(), final_model_path)
        print(f"Final best model saved explicitly at {final_model_path}")

        # Evaluate model
        print("Evaluating Model on Test Set...")
        evaluate_model(model, test_loader, DEVICE)

# AIO

In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from net import FraudNet, EnhancedFraudNet  # Import fraud detection model
from data import get_dataloaders_fraud, get_dataloaders_fraud_2  # Import dataset functions
from evaluation import evaluate_model  # Import evaluation function
from train import train_model, set_all_seeds  # Import training function from train.py
import pandas as pd
import sys
from plot import plot_metrics, plot_confusion_matrices, plot_aucpr
import pickle


# Load fraud dataset
set_all_seeds(42)

# Set dataset path
DATASET_PATH = "/home/nahear/Thesis/NVFlare/examples/hello-world/ml-to-fl/pt/src/data/creditcard.csv"
TEST_DATASET_PATH = "/home/nahear/Thesis/NVFlare/examples/hello-world/ml-to-fl/pt/src/data/server.csv"
TRAIN_VALID_DATASET_PATH = "/home/nahear/Thesis/NVFlare/examples/hello-world/ml-to-fl/pt/src/data/train_valid.csv"

DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

# for batch_size in [96, 128, 256]:
for batch_size in [32, 64, 96, 128, 256]:
    num_epochs = 15
    learning_rate = 0.00003

    df = pd.read_csv(DATASET_PATH)
    input_size = df.shape[1] - 1
    print(f"Detected input size: {input_size}")

    # train_loader, valid_loader, test_loader, class_weights = get_dataloaders_fraud(
    #     DATASET_PATH, batch_size=batch_size, use_smote=True, plot=True, save_plot_dir='data_plot'
    # )

    train_loader, valid_loader, test_loader, class_weights, _ = get_dataloaders_fraud_2(
        TRAIN_VALID_DATASET_PATH, test_csv=TEST_DATASET_PATH, batch_size=batch_size, use_smote=True, plot=True, save_plot_dir='data_plot'
    )

    for model in [FraudNet(input_size=input_size).to(DEVICE), EnhancedFraudNet(input_size=input_size).to(DEVICE)]:
        # for pos_weight in [torch.tensor([class_weights[1] / class_weights[0]], device=DEVICE), None]:
        #     for stochastic_val in [True, False]: 
        for pos_weight, stochastic_val in zip([torch.tensor([class_weights[1] / class_weights[0]], device=DEVICE), None], [True, False]):
                # Get model name for saving metrics
                model_name = model.__class__.__name__

                # Loss Function (No weight balancing since using SMOTE)
                criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

                # Optimizer
                optimizer = optim.Adam(model.parameters(), lr=learning_rate)

                # Call `train.py` instead of writing the training loop here
                scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                    optimizer, mode='max', patience=3, verbose=True
                )

                train_loss_list, train_metrics_list, valid_metrics_list, test_metrics = train_model(
                    model, num_epochs, train_loader, valid_loader, test_loader, optimizer,
                    criterion, DEVICE, scheduler=scheduler, stochastic=stochastic_val
                )

                save_plot_dir = f'plot_{model_name}_{batch_size}_{num_epochs}_{learning_rate}'
                if pos_weight:
                    save_plot_dir += "_pos_weight"
                if stochastic_val:
                    save_plot_dir += "_stochastic"
                    
                os.makedirs(save_plot_dir, exist_ok=True)

                # Create metrics directory if it doesn't exist
                metrics_dir = f'metrics'
                os.makedirs(metrics_dir, exist_ok=True)

                # Save training metrics
                metrics_data = {
                    'train_metrics': train_metrics_list,
                    'valid_metrics': valid_metrics_list,
                    'test_metrics': test_metrics,
                    'train_loss': train_loss_list
                }
                metrics_file_name = f"{model_name}_{batch_size}_{num_epochs}_{learning_rate}"
                if pos_weight:
                    metrics_file_name += "_pos_weight"
                if stochastic_val:
                    metrics_file_name += "_stochastic"
                metrics_file = os.path.join(metrics_dir, f"{metrics_file_name}_metrics.pickle")
                    
                with open(metrics_file, 'wb') as f:
                    pickle.dump(metrics_data, f)

                print(f"Metrics saved to {metrics_file}")

                # Create plots
                plot_metrics(train_metrics_list, fig_name="Training Metrics", save_path=f"{save_plot_dir}/train_metrics.png")
                plot_metrics(valid_metrics_list, fig_name="Validation Metrics", save_path=f"{save_plot_dir}/valid_metrics.png")
                plot_confusion_matrices(model, test_loader, threshold=0.85, save_path=f"{save_plot_dir}/confusion_matrix.png")
                plot_aucpr(model, test_loader, device=DEVICE, save_path=f"{save_plot_dir}/auc_pr.png")

                # Save the trained model
                best_model_path = "best_model.pth"
                model.load_state_dict(torch.load(best_model_path))
                print("Loaded best model from training phase.")

                model_dir = 'models'
                os.makedirs(model_dir, exist_ok=True)
                
                model_file_name = f"{model_name}_{batch_size}_{num_epochs}_{learning_rate}"
                # Save the best model explicitly at a clear location for future usage
                if pos_weight:
                    model_file_name += "_pos_weight"
                if stochastic_val:
                    model_file_name += "_stochastic"
                final_model_path = os.path.join(model_dir, f"{model_file_name}_model.pth")
                    
                torch.save(model.state_dict(), final_model_path)
                print(f"Final best model saved explicitly at {final_model_path}")

                # Evaluate model
                print("Evaluating Model on Test Set...")
                evaluate_model(model, test_loader, DEVICE)

Detected input size: 30
Applying SMOTE to balance training data...
Adding Gaussian noise (std=0.1) to training data...


  self.labels = torch.tensor(labels, dtype=torch.float32)


Training set size after SMOTE: 263322 samples
Validation set size: 51085 samples




Starting Training...
[Epoch 1, Batch 1] Loss: 0.9104
[Epoch 1, Batch 823] Loss: 0.9639
[Epoch 1, Batch 1645] Loss: 0.7305
[Epoch 1, Batch 2467] Loss: 0.8922
[Epoch 1, Batch 3289] Loss: 0.6519
[Epoch 1, Batch 4111] Loss: 0.9415
[Epoch 1, Batch 4933] Loss: 0.3406
[Epoch 1, Batch 5755] Loss: 0.3462
[Epoch 1, Batch 6577] Loss: 0.3674
[Epoch 1, Batch 7399] Loss: 0.3201
[Epoch 1, Batch 8221] Loss: 0.2291
Epoch 1/15: Train Loss: 0.5741 | Train Acc: 93.49% | Valid Loss: 0.0710 | Valid Acc: 99.93% | Valid Precision: 80.52% | Valid Recall: 72.94% | Valid F1-score: 76.54% | Valid AUC-PR: 72.08%
Time Elapsed: 0.55 minutes
Model improved. Saving best model.
[Epoch 2, Batch 1] Loss: 0.3977
[Epoch 2, Batch 823] Loss: 0.2743
[Epoch 2, Batch 1645] Loss: 0.1934
[Epoch 2, Batch 2467] Loss: 0.3869
[Epoch 2, Batch 3289] Loss: 0.4107
[Epoch 2, Batch 4111] Loss: 0.4050
[Epoch 2, Batch 4933] Loss: 0.3209
[Epoch 2, Batch 5755] Loss: 0.3021
[Epoch 2, Batch 6577] Loss: 0.4297
[Epoch 2, Batch 7399] Loss: 0.1487
[



[Epoch 1, Batch 823] Loss: 0.0320
[Epoch 1, Batch 1645] Loss: 0.1720
[Epoch 1, Batch 2467] Loss: 0.1161
[Epoch 1, Batch 3289] Loss: 0.4929
[Epoch 1, Batch 4111] Loss: 0.0440
[Epoch 1, Batch 4933] Loss: 0.1444
[Epoch 1, Batch 5755] Loss: 0.2632
[Epoch 1, Batch 6577] Loss: 0.1762
[Epoch 1, Batch 7399] Loss: 0.0209
[Epoch 1, Batch 8221] Loss: 0.1505
Epoch 1/15: Train Loss: 0.1047 | Train Acc: 97.29% | Valid Loss: 0.0362 | Valid Acc: 99.92% | Valid Precision: 72.00% | Valid Recall: 84.71% | Valid F1-score: 77.84% | Valid AUC-PR: 72.58%
Time Elapsed: 0.59 minutes
Model improved. Saving best model.
[Epoch 2, Batch 1] Loss: 0.0141
[Epoch 2, Batch 823] Loss: 0.1588
[Epoch 2, Batch 1645] Loss: 0.1178
[Epoch 2, Batch 2467] Loss: 0.1806
[Epoch 2, Batch 3289] Loss: 0.0238
[Epoch 2, Batch 4111] Loss: 0.0903
[Epoch 2, Batch 4933] Loss: 0.0122
[Epoch 2, Batch 5755] Loss: 0.0789
[Epoch 2, Batch 6577] Loss: 0.2105
[Epoch 2, Batch 7399] Loss: 0.0242
[Epoch 2, Batch 8221] Loss: 0.0191
Epoch 2/15: Train L



[Epoch 1, Batch 823] Loss: 1.0361
[Epoch 1, Batch 1645] Loss: 1.0303
[Epoch 1, Batch 2467] Loss: 0.7921
[Epoch 1, Batch 3289] Loss: 0.7770
[Epoch 1, Batch 4111] Loss: 0.6986
[Epoch 1, Batch 4933] Loss: 0.5189
[Epoch 1, Batch 5755] Loss: 0.3433
[Epoch 1, Batch 6577] Loss: 0.2150
[Epoch 1, Batch 7399] Loss: 0.2114
[Epoch 1, Batch 8221] Loss: 0.4613
Epoch 1/15: Train Loss: 0.6338 | Train Acc: 91.46% | Valid Loss: 0.1809 | Valid Acc: 99.92% | Valid Precision: 81.82% | Valid Recall: 63.53% | Valid F1-score: 71.52% | Valid AUC-PR: 72.25%
Time Elapsed: 0.49 minutes
Model improved. Saving best model.
[Epoch 2, Batch 1] Loss: 0.9578
[Epoch 2, Batch 823] Loss: 0.4108
[Epoch 2, Batch 1645] Loss: 0.4089
[Epoch 2, Batch 2467] Loss: 0.1265
[Epoch 2, Batch 3289] Loss: 0.2723
[Epoch 2, Batch 4111] Loss: 0.4288
[Epoch 2, Batch 4933] Loss: 0.1543
[Epoch 2, Batch 5755] Loss: 0.3151
[Epoch 2, Batch 6577] Loss: 0.2356
[Epoch 2, Batch 7399] Loss: 0.6161
[Epoch 2, Batch 8221] Loss: 0.1196
Epoch 2/15: Train L



[Epoch 1, Batch 823] Loss: 0.0654
[Epoch 1, Batch 1645] Loss: 0.0378
[Epoch 1, Batch 2467] Loss: 0.1994
[Epoch 1, Batch 3289] Loss: 0.1422
[Epoch 1, Batch 4111] Loss: 0.0241
[Epoch 1, Batch 4933] Loss: 0.0317
[Epoch 1, Batch 5755] Loss: 0.0284
[Epoch 1, Batch 6577] Loss: 0.0295
[Epoch 1, Batch 7399] Loss: 0.1191
[Epoch 1, Batch 8221] Loss: 0.0229
Epoch 1/15: Train Loss: 0.1076 | Train Acc: 95.75% | Valid Loss: 0.0212 | Valid Acc: 99.93% | Valid Precision: 77.53% | Valid Recall: 81.18% | Valid F1-score: 79.31% | Valid AUC-PR: 73.90%
Time Elapsed: 0.55 minutes
Model improved. Saving best model.
[Epoch 2, Batch 1] Loss: 0.0164
[Epoch 2, Batch 823] Loss: 0.0201
[Epoch 2, Batch 1645] Loss: 0.2251
[Epoch 2, Batch 2467] Loss: 0.1159
[Epoch 2, Batch 3289] Loss: 0.0262
[Epoch 2, Batch 4111] Loss: 0.0460
[Epoch 2, Batch 4933] Loss: 0.0371
[Epoch 2, Batch 5755] Loss: 0.5698
[Epoch 2, Batch 6577] Loss: 0.3946
[Epoch 2, Batch 7399] Loss: 0.0501
[Epoch 2, Batch 8221] Loss: 0.2235
Epoch 2/15: Train L

  self.labels = torch.tensor(labels, dtype=torch.float32)


Training set size after SMOTE: 263094 samples
Validation set size: 51085 samples
Starting Training...
[Epoch 1, Batch 1] Loss: 1.1592
[Epoch 1, Batch 412] Loss: 0.9786
[Epoch 1, Batch 823] Loss: 0.9623
[Epoch 1, Batch 1234] Loss: 1.0417
[Epoch 1, Batch 1645] Loss: 0.7709
[Epoch 1, Batch 2056] Loss: 0.5457
[Epoch 1, Batch 2467] Loss: 0.5445
[Epoch 1, Batch 2878] Loss: 0.5043
[Epoch 1, Batch 3289] Loss: 0.4661
[Epoch 1, Batch 3700] Loss: 0.3132
[Epoch 1, Batch 4111] Loss: 0.5297
Epoch 1/15: Train Loss: 0.7109 | Train Acc: 87.80% | Valid Loss: 0.1359 | Valid Acc: 99.89% | Valid Precision: 81.82% | Valid Recall: 42.35% | Valid F1-score: 55.81% | Valid AUC-PR: 56.04%
Time Elapsed: 0.33 minutes
Model improved. Saving best model.
[Epoch 2, Batch 1] Loss: 0.3877
[Epoch 2, Batch 412] Loss: 0.3081
[Epoch 2, Batch 823] Loss: 0.3216
[Epoch 2, Batch 1234] Loss: 0.3678
[Epoch 2, Batch 1645] Loss: 0.2187
[Epoch 2, Batch 2056] Loss: 0.3159
[Epoch 2, Batch 2467] Loss: 0.1535
[Epoch 2, Batch 2878] Loss:



[Epoch 1, Batch 412] Loss: 0.0934
[Epoch 1, Batch 823] Loss: 0.1570
[Epoch 1, Batch 1234] Loss: 0.1595
[Epoch 1, Batch 1645] Loss: 0.1035
[Epoch 1, Batch 2056] Loss: 0.1322
[Epoch 1, Batch 2467] Loss: 0.1357
[Epoch 1, Batch 2878] Loss: 0.0259
[Epoch 1, Batch 3289] Loss: 0.1687
[Epoch 1, Batch 3700] Loss: 0.0833
[Epoch 1, Batch 4111] Loss: 0.0516
Epoch 1/15: Train Loss: 0.0869 | Train Acc: 96.73% | Valid Loss: 0.0173 | Valid Acc: 99.87% | Valid Precision: 57.60% | Valid Recall: 84.71% | Valid F1-score: 68.57% | Valid AUC-PR: 73.05%
Time Elapsed: 0.30 minutes
Model improved. Saving best model.
[Epoch 2, Batch 1] Loss: 0.0414
[Epoch 2, Batch 412] Loss: 0.0198
[Epoch 2, Batch 823] Loss: 0.1230
[Epoch 2, Batch 1234] Loss: 0.1160
[Epoch 2, Batch 1645] Loss: 0.1496
[Epoch 2, Batch 2056] Loss: 0.0624
[Epoch 2, Batch 2467] Loss: 0.2272
[Epoch 2, Batch 2878] Loss: 0.0804
[Epoch 2, Batch 3289] Loss: 0.1289
[Epoch 2, Batch 3700] Loss: 0.1056
[Epoch 2, Batch 4111] Loss: 0.0211
Epoch 2/15: Train Los



[Epoch 1, Batch 412] Loss: 1.0975
[Epoch 1, Batch 823] Loss: 0.9741
[Epoch 1, Batch 1234] Loss: 1.0860
[Epoch 1, Batch 1645] Loss: 0.8223
[Epoch 1, Batch 2056] Loss: 0.7076
[Epoch 1, Batch 2467] Loss: 0.5980
[Epoch 1, Batch 2878] Loss: 0.5351
[Epoch 1, Batch 3289] Loss: 0.4178
[Epoch 1, Batch 3700] Loss: 0.4512
[Epoch 1, Batch 4111] Loss: 0.4459
Epoch 1/15: Train Loss: 0.7099 | Train Acc: 90.97% | Valid Loss: 0.3665 | Valid Acc: 99.92% | Valid Precision: 85.71% | Valid Recall: 63.53% | Valid F1-score: 72.97% | Valid AUC-PR: 72.58%
Time Elapsed: 0.33 minutes
Model improved. Saving best model.
[Epoch 2, Batch 1] Loss: 0.5848
[Epoch 2, Batch 412] Loss: 0.4092
[Epoch 2, Batch 823] Loss: 0.5949
[Epoch 2, Batch 1234] Loss: 0.4121
[Epoch 2, Batch 1645] Loss: 0.2802
[Epoch 2, Batch 2056] Loss: 0.1819
[Epoch 2, Batch 2467] Loss: 0.4236
[Epoch 2, Batch 2878] Loss: 0.2358
[Epoch 2, Batch 3289] Loss: 0.5533
[Epoch 2, Batch 3700] Loss: 0.3413
[Epoch 2, Batch 4111] Loss: 0.2715
Epoch 2/15: Train Los



[Epoch 1, Batch 412] Loss: 0.0519
[Epoch 1, Batch 823] Loss: 0.0544
[Epoch 1, Batch 1234] Loss: 0.1913
[Epoch 1, Batch 1645] Loss: 0.1254
[Epoch 1, Batch 2056] Loss: 0.0358
[Epoch 1, Batch 2467] Loss: 0.1191
[Epoch 1, Batch 2878] Loss: 0.0427
[Epoch 1, Batch 3289] Loss: 0.0911
[Epoch 1, Batch 3700] Loss: 0.1011
[Epoch 1, Batch 4111] Loss: 0.0957
Epoch 1/15: Train Loss: 0.0952 | Train Acc: 97.83% | Valid Loss: 0.0714 | Valid Acc: 99.83% | Valid Precision: 49.66% | Valid Recall: 85.88% | Valid F1-score: 62.93% | Valid AUC-PR: 74.54%
Time Elapsed: 0.26 minutes
Model improved. Saving best model.
[Epoch 2, Batch 1] Loss: 0.0277
[Epoch 2, Batch 412] Loss: 0.0708
[Epoch 2, Batch 823] Loss: 0.0565
[Epoch 2, Batch 1234] Loss: 0.0340
[Epoch 2, Batch 1645] Loss: 0.0903
[Epoch 2, Batch 2056] Loss: 0.0417
[Epoch 2, Batch 2467] Loss: 0.0266
[Epoch 2, Batch 2878] Loss: 0.0474
[Epoch 2, Batch 3289] Loss: 0.0996
[Epoch 2, Batch 3700] Loss: 0.0749
[Epoch 2, Batch 4111] Loss: 0.0231
Epoch 2/15: Train Los

  self.labels = torch.tensor(labels, dtype=torch.float32)


Training set size after SMOTE: 263296 samples
Validation set size: 51085 samples
Starting Training...
[Epoch 1, Batch 1] Loss: 1.1032
[Epoch 1, Batch 275] Loss: 1.0521
[Epoch 1, Batch 549] Loss: 1.0972
[Epoch 1, Batch 823] Loss: 0.7870
[Epoch 1, Batch 1097] Loss: 1.0005
[Epoch 1, Batch 1371] Loss: 0.7510
[Epoch 1, Batch 1645] Loss: 0.5432
[Epoch 1, Batch 1919] Loss: 0.5243
[Epoch 1, Batch 2193] Loss: 0.5631
[Epoch 1, Batch 2467] Loss: 0.3324
[Epoch 1, Batch 2741] Loss: 0.4592
Epoch 1/15: Train Loss: 0.7356 | Train Acc: 96.35% | Valid Loss: 0.5372 | Valid Acc: 99.90% | Valid Precision: 67.35% | Valid Recall: 77.65% | Valid F1-score: 72.13% | Valid AUC-PR: 71.97%
Time Elapsed: 0.20 minutes
Model improved. Saving best model.
[Epoch 2, Batch 1] Loss: 0.4410
[Epoch 2, Batch 275] Loss: 0.3557
[Epoch 2, Batch 549] Loss: 0.2234
[Epoch 2, Batch 823] Loss: 0.2978
[Epoch 2, Batch 1097] Loss: 0.2296
[Epoch 2, Batch 1371] Loss: 0.2233
[Epoch 2, Batch 1645] Loss: 0.2316
[Epoch 2, Batch 1919] Loss: 0



[Epoch 1, Batch 275] Loss: 0.1848
[Epoch 1, Batch 549] Loss: 0.0619
[Epoch 1, Batch 823] Loss: 0.0558
[Epoch 1, Batch 1097] Loss: 0.0372
[Epoch 1, Batch 1371] Loss: 0.0564
[Epoch 1, Batch 1645] Loss: 0.0341
[Epoch 1, Batch 1919] Loss: 0.1263
[Epoch 1, Batch 2193] Loss: 0.0581
[Epoch 1, Batch 2467] Loss: 0.0410
[Epoch 1, Batch 2741] Loss: 0.0445
Epoch 1/15: Train Loss: 0.0818 | Train Acc: 95.11% | Valid Loss: 0.0127 | Valid Acc: 99.92% | Valid Precision: 80.26% | Valid Recall: 71.76% | Valid F1-score: 75.78% | Valid AUC-PR: 70.51%
Time Elapsed: 0.24 minutes
Model improved. Saving best model.
[Epoch 2, Batch 1] Loss: 0.0743
[Epoch 2, Batch 275] Loss: 0.0460
[Epoch 2, Batch 549] Loss: 0.0338
[Epoch 2, Batch 823] Loss: 0.0997
[Epoch 2, Batch 1097] Loss: 0.1188
[Epoch 2, Batch 1371] Loss: 0.0473
[Epoch 2, Batch 1645] Loss: 0.0390
[Epoch 2, Batch 1919] Loss: 0.0291
[Epoch 2, Batch 2193] Loss: 0.0442
[Epoch 2, Batch 2467] Loss: 0.0749
[Epoch 2, Batch 2741] Loss: 0.0582
Epoch 2/15: Train Loss:



[Epoch 1, Batch 275] Loss: 1.0235
[Epoch 1, Batch 549] Loss: 0.9959
[Epoch 1, Batch 823] Loss: 0.9036
[Epoch 1, Batch 1097] Loss: 0.9825
[Epoch 1, Batch 1371] Loss: 0.8932
[Epoch 1, Batch 1645] Loss: 0.7788
[Epoch 1, Batch 1919] Loss: 0.6486
[Epoch 1, Batch 2193] Loss: 0.6263
[Epoch 1, Batch 2467] Loss: 0.5398
[Epoch 1, Batch 2741] Loss: 0.3889
Epoch 1/15: Train Loss: 0.8431 | Train Acc: 87.17% | Valid Loss: 0.4179 | Valid Acc: 99.89% | Valid Precision: 80.43% | Valid Recall: 43.53% | Valid F1-score: 56.49% | Valid AUC-PR: 67.64%
Time Elapsed: 0.22 minutes
Model improved. Saving best model.
[Epoch 2, Batch 1] Loss: 0.4844
[Epoch 2, Batch 275] Loss: 0.3752
[Epoch 2, Batch 549] Loss: 0.4811
[Epoch 2, Batch 823] Loss: 0.3888
[Epoch 2, Batch 1097] Loss: 0.3476
[Epoch 2, Batch 1371] Loss: 0.3182
[Epoch 2, Batch 1645] Loss: 0.2662
[Epoch 2, Batch 1919] Loss: 0.5559
[Epoch 2, Batch 2193] Loss: 0.2126
[Epoch 2, Batch 2467] Loss: 0.2661
[Epoch 2, Batch 2741] Loss: 0.1934
Epoch 2/15: Train Loss:



[Epoch 1, Batch 275] Loss: 0.0469
[Epoch 1, Batch 549] Loss: 0.0516
[Epoch 1, Batch 823] Loss: 0.1444
[Epoch 1, Batch 1097] Loss: 0.0926
[Epoch 1, Batch 1371] Loss: 0.1472
[Epoch 1, Batch 1645] Loss: 0.0465
[Epoch 1, Batch 1919] Loss: 0.0983
[Epoch 1, Batch 2193] Loss: 0.0559
[Epoch 1, Batch 2467] Loss: 0.1033
[Epoch 1, Batch 2741] Loss: 0.0870
Epoch 1/15: Train Loss: 0.0769 | Train Acc: 97.00% | Valid Loss: 0.0282 | Valid Acc: 99.91% | Valid Precision: 68.69% | Valid Recall: 80.00% | Valid F1-score: 73.91% | Valid AUC-PR: 72.39%
Time Elapsed: 0.24 minutes
Model improved. Saving best model.
[Epoch 2, Batch 1] Loss: 0.0469
[Epoch 2, Batch 275] Loss: 0.0161
[Epoch 2, Batch 549] Loss: 0.0529
[Epoch 2, Batch 823] Loss: 0.1542
[Epoch 2, Batch 1097] Loss: 0.0790
[Epoch 2, Batch 1371] Loss: 0.0675
[Epoch 2, Batch 1645] Loss: 0.1347
[Epoch 2, Batch 1919] Loss: 0.0613
[Epoch 2, Batch 2193] Loss: 0.0744
[Epoch 2, Batch 2467] Loss: 0.0383
[Epoch 2, Batch 2741] Loss: 0.0332
Epoch 2/15: Train Loss:

  self.labels = torch.tensor(labels, dtype=torch.float32)


Training set size after SMOTE: 263328 samples
Validation set size: 51085 samples
Starting Training...
[Epoch 1, Batch 1] Loss: 1.1276
[Epoch 1, Batch 206] Loss: 1.0000
[Epoch 1, Batch 411] Loss: 1.0653
[Epoch 1, Batch 616] Loss: 0.9797
[Epoch 1, Batch 821] Loss: 1.0354
[Epoch 1, Batch 1026] Loss: 1.0492
[Epoch 1, Batch 1231] Loss: 0.9758
[Epoch 1, Batch 1436] Loss: 0.9500
[Epoch 1, Batch 1641] Loss: 0.8121
[Epoch 1, Batch 1846] Loss: 0.7940
[Epoch 1, Batch 2051] Loss: 0.5956
Epoch 1/15: Train Loss: 0.9626 | Train Acc: 83.66% | Valid Loss: 0.4460 | Valid Acc: 99.89% | Valid Precision: 87.18% | Valid Recall: 40.00% | Valid F1-score: 54.84% | Valid AUC-PR: 62.07%
Time Elapsed: 0.19 minutes
Model improved. Saving best model.
[Epoch 2, Batch 1] Loss: 0.6229
[Epoch 2, Batch 206] Loss: 0.5811
[Epoch 2, Batch 411] Loss: 0.5394
[Epoch 2, Batch 616] Loss: 0.3855
[Epoch 2, Batch 821] Loss: 0.3770
[Epoch 2, Batch 1026] Loss: 0.4379
[Epoch 2, Batch 1231] Loss: 0.4529
[Epoch 2, Batch 1436] Loss: 0.4



[Epoch 1, Batch 206] Loss: 0.0957
[Epoch 1, Batch 411] Loss: 0.1330
[Epoch 1, Batch 616] Loss: 0.0782
[Epoch 1, Batch 821] Loss: 0.0778
[Epoch 1, Batch 1026] Loss: 0.0816
[Epoch 1, Batch 1231] Loss: 0.0788
[Epoch 1, Batch 1436] Loss: 0.0792
[Epoch 1, Batch 1641] Loss: 0.0502
[Epoch 1, Batch 1846] Loss: 0.0947
[Epoch 1, Batch 2051] Loss: 0.1330
Epoch 1/15: Train Loss: 0.0981 | Train Acc: 93.19% | Valid Loss: 0.0425 | Valid Acc: 99.93% | Valid Precision: 82.19% | Valid Recall: 70.59% | Valid F1-score: 75.95% | Valid AUC-PR: 72.64%
Time Elapsed: 0.18 minutes
Model improved. Saving best model.
[Epoch 2, Batch 1] Loss: 0.0913
[Epoch 2, Batch 206] Loss: 0.0683
[Epoch 2, Batch 411] Loss: 0.0411
[Epoch 2, Batch 616] Loss: 0.0943
[Epoch 2, Batch 821] Loss: 0.0794
[Epoch 2, Batch 1026] Loss: 0.0429
[Epoch 2, Batch 1231] Loss: 0.0483
[Epoch 2, Batch 1436] Loss: 0.0874
[Epoch 2, Batch 1641] Loss: 0.0879
[Epoch 2, Batch 1846] Loss: 0.0937
[Epoch 2, Batch 2051] Loss: 0.0823
Epoch 2/15: Train Loss: 0



[Epoch 1, Batch 206] Loss: 1.0037
[Epoch 1, Batch 411] Loss: 1.1221
[Epoch 1, Batch 616] Loss: 0.9283
[Epoch 1, Batch 821] Loss: 0.7880
[Epoch 1, Batch 1026] Loss: 0.7934
[Epoch 1, Batch 1231] Loss: 0.8458
[Epoch 1, Batch 1436] Loss: 0.6827
[Epoch 1, Batch 1641] Loss: 0.8488
[Epoch 1, Batch 1846] Loss: 0.6072
[Epoch 1, Batch 2051] Loss: 0.6286
Epoch 1/15: Train Loss: 0.8475 | Train Acc: 88.17% | Valid Loss: 0.6122 | Valid Acc: 99.86% | Valid Precision: 61.19% | Valid Recall: 48.24% | Valid F1-score: 53.95% | Valid AUC-PR: 50.43%
Time Elapsed: 0.23 minutes
Model improved. Saving best model.
[Epoch 2, Batch 1] Loss: 0.6342
[Epoch 2, Batch 206] Loss: 0.6988
[Epoch 2, Batch 411] Loss: 0.5003
[Epoch 2, Batch 616] Loss: 0.4259
[Epoch 2, Batch 821] Loss: 0.4775
[Epoch 2, Batch 1026] Loss: 0.3666
[Epoch 2, Batch 1231] Loss: 0.3797
[Epoch 2, Batch 1436] Loss: 0.3078
[Epoch 2, Batch 1641] Loss: 0.3515
[Epoch 2, Batch 1846] Loss: 0.2303
[Epoch 2, Batch 2051] Loss: 0.2268
Epoch 2/15: Train Loss: 0



[Epoch 1, Batch 206] Loss: 0.1220
[Epoch 1, Batch 411] Loss: 0.1814
[Epoch 1, Batch 616] Loss: 0.1722
[Epoch 1, Batch 821] Loss: 0.1125
[Epoch 1, Batch 1026] Loss: 0.0645
[Epoch 1, Batch 1231] Loss: 0.1202
[Epoch 1, Batch 1436] Loss: 0.0856
[Epoch 1, Batch 1641] Loss: 0.0782
[Epoch 1, Batch 1846] Loss: 0.1287
[Epoch 1, Batch 2051] Loss: 0.0619
Epoch 1/15: Train Loss: 0.0940 | Train Acc: 96.18% | Valid Loss: 0.0225 | Valid Acc: 99.93% | Valid Precision: 78.65% | Valid Recall: 82.35% | Valid F1-score: 80.46% | Valid AUC-PR: 76.86%
Time Elapsed: 0.21 minutes
Model improved. Saving best model.
[Epoch 2, Batch 1] Loss: 0.0792
[Epoch 2, Batch 206] Loss: 0.0677
[Epoch 2, Batch 411] Loss: 0.0859
[Epoch 2, Batch 616] Loss: 0.0729
[Epoch 2, Batch 821] Loss: 0.0787
[Epoch 2, Batch 1026] Loss: 0.0730
[Epoch 2, Batch 1231] Loss: 0.0620
[Epoch 2, Batch 1436] Loss: 0.0261
[Epoch 2, Batch 1641] Loss: 0.0609
[Epoch 2, Batch 1846] Loss: 0.0823
[Epoch 2, Batch 2051] Loss: 0.1054
Epoch 2/15: Train Loss: 0

  self.labels = torch.tensor(labels, dtype=torch.float32)


Training set size after SMOTE: 263380 samples
Validation set size: 51085 samples
Starting Training...
[Epoch 1, Batch 1] Loss: 1.1603
[Epoch 1, Batch 103] Loss: 1.0949
[Epoch 1, Batch 205] Loss: 1.0775
[Epoch 1, Batch 307] Loss: 1.0238
[Epoch 1, Batch 409] Loss: 1.0180
[Epoch 1, Batch 511] Loss: 0.9415
[Epoch 1, Batch 613] Loss: 0.8439
[Epoch 1, Batch 715] Loss: 0.7802
[Epoch 1, Batch 817] Loss: 0.6789
[Epoch 1, Batch 919] Loss: 0.6131
[Epoch 1, Batch 1021] Loss: 0.5881
Epoch 1/15: Train Loss: 0.8905 | Train Acc: 90.48% | Valid Loss: 0.4822 | Valid Acc: 99.91% | Valid Precision: 81.36% | Valid Recall: 56.47% | Valid F1-score: 66.67% | Valid AUC-PR: 69.31%
Time Elapsed: 0.16 minutes
Model improved. Saving best model.
[Epoch 2, Batch 1] Loss: 0.5954
[Epoch 2, Batch 103] Loss: 0.5693
[Epoch 2, Batch 205] Loss: 0.5366
[Epoch 2, Batch 307] Loss: 0.5653
[Epoch 2, Batch 409] Loss: 0.4796
[Epoch 2, Batch 511] Loss: 0.4727
[Epoch 2, Batch 613] Loss: 0.5086
[Epoch 2, Batch 715] Loss: 0.5161
[Epo



[Epoch 1, Batch 103] Loss: 0.1518
[Epoch 1, Batch 205] Loss: 0.1416
[Epoch 1, Batch 307] Loss: 0.1383
[Epoch 1, Batch 409] Loss: 0.1416
[Epoch 1, Batch 511] Loss: 0.1260
[Epoch 1, Batch 613] Loss: 0.1401
[Epoch 1, Batch 715] Loss: 0.1484
[Epoch 1, Batch 817] Loss: 0.1157
[Epoch 1, Batch 919] Loss: 0.1137
[Epoch 1, Batch 1021] Loss: 0.1197
Epoch 1/15: Train Loss: 0.1341 | Train Acc: 97.62% | Valid Loss: 0.1667 | Valid Acc: 99.85% | Valid Precision: 53.28% | Valid Recall: 85.88% | Valid F1-score: 65.77% | Valid AUC-PR: 80.16%
Time Elapsed: 0.17 minutes
Model improved. Saving best model.
[Epoch 2, Batch 1] Loss: 0.0921
[Epoch 2, Batch 103] Loss: 0.1002
[Epoch 2, Batch 205] Loss: 0.1178
[Epoch 2, Batch 307] Loss: 0.1267
[Epoch 2, Batch 409] Loss: 0.1316
[Epoch 2, Batch 511] Loss: 0.1165
[Epoch 2, Batch 613] Loss: 0.0986
[Epoch 2, Batch 715] Loss: 0.1029
[Epoch 2, Batch 817] Loss: 0.0970
[Epoch 2, Batch 919] Loss: 0.0748
[Epoch 2, Batch 1021] Loss: 0.1042
Epoch 2/15: Train Loss: 0.1067 | Tr



[Epoch 1, Batch 103] Loss: 1.0143
[Epoch 1, Batch 205] Loss: 0.9796
[Epoch 1, Batch 307] Loss: 1.0347
[Epoch 1, Batch 409] Loss: 1.0624
[Epoch 1, Batch 511] Loss: 0.9634
[Epoch 1, Batch 613] Loss: 0.8946
[Epoch 1, Batch 715] Loss: 0.8949
[Epoch 1, Batch 817] Loss: 0.8141
[Epoch 1, Batch 919] Loss: 0.7924
[Epoch 1, Batch 1021] Loss: 0.7107
Epoch 1/15: Train Loss: 0.9206 | Train Acc: 81.84% | Valid Loss: 0.4291 | Valid Acc: 99.86% | Valid Precision: 78.95% | Valid Recall: 17.65% | Valid F1-score: 28.85% | Valid AUC-PR: 47.70%
Time Elapsed: 0.16 minutes
Model improved. Saving best model.
[Epoch 2, Batch 1] Loss: 0.6749
[Epoch 2, Batch 103] Loss: 0.7081
[Epoch 2, Batch 205] Loss: 0.6643
[Epoch 2, Batch 307] Loss: 0.6090
[Epoch 2, Batch 409] Loss: 0.6443
[Epoch 2, Batch 511] Loss: 0.5999
[Epoch 2, Batch 613] Loss: 0.4799
[Epoch 2, Batch 715] Loss: 0.5170
[Epoch 2, Batch 817] Loss: 0.5862
[Epoch 2, Batch 919] Loss: 0.4540
[Epoch 2, Batch 1021] Loss: 0.4387
Epoch 2/15: Train Loss: 0.5603 | Tr



[Epoch 1, Batch 103] Loss: 0.1529
[Epoch 1, Batch 205] Loss: 0.1161
[Epoch 1, Batch 307] Loss: 0.1288
[Epoch 1, Batch 409] Loss: 0.1553
[Epoch 1, Batch 511] Loss: 0.1033
[Epoch 1, Batch 613] Loss: 0.1604
[Epoch 1, Batch 715] Loss: 0.1020
[Epoch 1, Batch 817] Loss: 0.0818
[Epoch 1, Batch 919] Loss: 0.0772
[Epoch 1, Batch 1021] Loss: 0.0858
Epoch 1/15: Train Loss: 0.1177 | Train Acc: 97.08% | Valid Loss: 0.0522 | Valid Acc: 99.92% | Valid Precision: 72.16% | Valid Recall: 82.35% | Valid F1-score: 76.92% | Valid AUC-PR: 79.40%
Time Elapsed: 0.15 minutes
Model improved. Saving best model.
[Epoch 2, Batch 1] Loss: 0.0905
[Epoch 2, Batch 103] Loss: 0.1118
[Epoch 2, Batch 205] Loss: 0.0971
[Epoch 2, Batch 307] Loss: 0.0875
[Epoch 2, Batch 409] Loss: 0.1033
[Epoch 2, Batch 511] Loss: 0.1048
[Epoch 2, Batch 613] Loss: 0.0854
[Epoch 2, Batch 715] Loss: 0.1118
[Epoch 2, Batch 817] Loss: 0.1048
[Epoch 2, Batch 919] Loss: 0.0715
[Epoch 2, Batch 1021] Loss: 0.0709
Epoch 2/15: Train Loss: 0.1007 | Tr

In [None]:
import pickle

DIR = '/home/nahear/Thesis/NVFlare/examples/hello-world/ml-to-fl/pt/src/metrics'
metrics = os.listdir(DIR)
metrics.sort()

for metric in metrics:
    if metric.startswith('Attention') and 'pos_weight' not in metric:
        with open(os.path.join(DIR, metric), 'rb') as f:
            data = pickle.load(f)
        
        print(data.keys())
        print(data['train_metrics'][1].keys())


# Note
## train_loss for comparison between model (same batch size)

# Compare Model with itself in diff batch

In [None]:
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np
from scipy.signal import savgol_filter
import re

def sort_by_model_and_size(filename):
    # Extract model name (everything before the first underscore)
    model_match = re.match(r'([^_]+)_', filename)
    model_name = model_match.group(1) if model_match else ""
    
    # Extract size (first number after the model name)
    size_match = re.search(r'_(\d+)_', filename)
    size = int(size_match.group(1)) if size_match else 0
    
    # Return tuple for sorting (first by model, then by size)
    return (model_name, size)

# Directory where metrics are stored
DIR = '/home/nahear/Thesis/NVFlare/examples/hello-world/ml-to-fl/pt/src/metrics'
METRIC_NAMES = ['train_metrics', 'valid_metrics']
METRIC_PERSIONS =  ['accuracy', 'precision', 'recall', 'f1_score', 'auc_roc', 'auc_pr', 'loss']

models = [FraudNet(), EnhancedFraudNet()]
for model in models:
    for metric_name in METRIC_NAMES:
        for metric_percision in METRIC_PERSIONS:
            model_name = model.__class__.__name__
            
            metrics = os.listdir(DIR)
            metrics.sort()
            
            # Set up the plot
            plt.figure(figsize=(12, 6))
            plt.title(f'{metric_name}_{metric_percision} in {model_name}', fontsize=14)
            plt.xlabel('Step', fontsize=12)
            plt.ylabel('Metric Value', fontsize=12)

            # Colors for different models with better contrast
            colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
            markers = ['o', 's', '^', 'D', 'x', '*']

            # Load and plot each metric file
            attention_models = []
            for i, metric in enumerate(sorted(metrics, key=sort_by_model_and_size)):
                if metric.startswith(model_name):
                    attention_models.append(metric)
                    with open(os.path.join(DIR, metric), 'rb') as f:
                        data = pickle.load(f)
                            
                    # Extract x and y values for plotting
                    x_values = []
                    y_values = []
                    
                    for j, point in enumerate(data[metric_name]):
                        x_values.append(j)
                        y_values.append(point[metric_percision])
                    
                    # Plot each point individually
                    for j in range(len(x_values)):
                        plt.plot(x_values[j], y_values[j], 
                                marker=markers[i % len(markers)], 
                                color=colors[i % len(colors)],
                                markersize=5)
                    
                    # Connect points with a line
                    plt.plot(x_values, y_values, 
                            color=colors[i % len(colors)], 
                            linewidth=1.5, 
                            alpha=0.7,
                            label=f'{metric}')

            # Add legend with better placement
            plt.legend(loc='best', fontsize=10)

            # Add grid for better readability but make it subtle
            plt.grid(True, linestyle='--', alpha=0.3)

            # Improve appearance
            plt.tight_layout()

            # Save the figure
            plt.savefig(f'{model_name}_{metric_name}_{metric_percision}_pos_weight_points.png', dpi=300, bbox_inches='tight')

            # Show the plot
            plt.show()

# Same Batch Size Diff Model

In [None]:
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np
import re
from matplotlib.lines import Line2D

# Directory where metrics are stored
DIR = '/home/nahear/Thesis/NVFlare/examples/hello-world/ml-to-fl/pt/src/metrics'
METRIC_NAMES = ['train_metrics', 'valid_metrics']
METRIC_PERSIONS = ['accuracy', 'precision', 'recall', 'f1_score', 'auc_roc', 'auc_pr', 'loss']

# Define batch size and models to compare
BATCH_SIZE = "256"
MODEL_NAMES = ["FraudNet", "EnhancedFraudNet"]

# Function to check if a file matches criteria
def matches_criteria(filename, model_name, batch_size, use_pos_weight, use_stochastic):
    if not filename.startswith(model_name):
        return False
    batch_match = re.search(r'_(\d+)_', filename)
    if not (batch_match and batch_match.group(1) == batch_size):
        return False
    if ("pos_weight" in filename) != use_pos_weight:
        return False
    if ("stochastic" in filename) != use_stochastic:
        return False
    return True

# Create plots
for metric_name in METRIC_NAMES:
    for metric_precision in METRIC_PERSIONS:
        plt.figure(figsize=(14, 8))
        plt.title(f'Comparison of Models: {metric_name}_{metric_precision} (Batch Size {BATCH_SIZE})', fontsize=14)
        plt.xlabel('Step', fontsize=12)
        plt.ylabel(f'{metric_precision.replace("_", " ").title()}', fontsize=12)

        base_colors = {
            'FraudNet': '#1f77b4',        # Blue
            'EnhancedFraudNet': '#ff7f0e' # Orange
        }

        line_styles = {
            (False, False): '-',    # regular
            (True, False): '--',    # pos_weight
            (False, True): '-.',    # stochastic
            (True, True): ':'       # pos_weight + stochastic
        }

        markers = {
            (False, False): 'o',
            (True, False): 's',
            (False, True): '^',
            (True, True): 'D'
        }

        metrics_files = os.listdir(DIR)
        legend_elements = []

        for model_name in MODEL_NAMES:
            for weight_type in [False, True]:
                for use_stochastic in [False, True]:
                    color = base_colors[model_name]
                    line_style = line_styles[(weight_type, use_stochastic)]
                    marker = markers[(weight_type, use_stochastic)]

                    model_found = False
                    for metric_file in metrics_files:
                        if matches_criteria(metric_file, model_name, BATCH_SIZE, weight_type, use_stochastic):
                            with open(os.path.join(DIR, metric_file), 'rb') as f:
                                try:
                                    data = pickle.load(f)
                                    x_values = []
                                    y_values = []

                                    for j, point in enumerate(data[metric_name]):
                                        if isinstance(point, dict) and metric_precision in point:
                                            x_values.append(j)
                                            y_values.append(point[metric_precision])

                                    if len(x_values) > 0:
                                        label = f"{model_name} {'(pos_weight)' if weight_type else '(regular)'}{' + stochastic' if use_stochastic else ''}"
                                        
                                        plt.plot(x_values, y_values,
                                                 color=color,
                                                 linestyle=line_style,
                                                 linewidth=2,
                                                 alpha=0.7)
                                        
                                        plt.scatter(x_values, y_values,
                                                    marker=marker,
                                                    color=color,
                                                    s=30,
                                                    alpha=0.9)
                                        
                                        legend_elements.append(
                                            Line2D([0], [0], color=color, marker=marker, linestyle=line_style,
                                                   markersize=8, label=label)
                                        )
                                        model_found = True
                                    else:
                                        print(f"No data points found for {metric_precision} in {metric_file}")
                                except Exception as e:
                                    print(f"Error processing {metric_file}: {e}")
                            if model_found:
                                break

        if legend_elements:
            plt.legend(handles=legend_elements, loc='best', fontsize=10, ncol=2)
        else:
            plt.close()
            continue

        plt.grid(True, linestyle='--', alpha=0.3)
        plt.gca().set_facecolor('#f8f8f8')
        plt.tight_layout()
        plt.savefig(f'comparison_{metric_name}_{metric_precision}_batch{BATCH_SIZE}_combined.png', dpi=300, bbox_inches='tight')
        plt.close()

print("All plots created successfully!")


# Compare Same Model Same Batch Size

In [2]:
import os
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

def extract_model_info(filename):
    base_name = filename.replace('_metrics.pickle', '')
    parts = base_name.split('_')
    model_name = parts[0]
    batch_size = int(parts[1]) if len(parts) > 1 and parts[1].isdigit() else 0
    variant = []
    if "pos_weight" in base_name:
        variant.append("pos_weight")
    if "stochastic" in base_name:
        variant.append("stochastic")
    return model_name, batch_size, "_".join(variant) if variant else "base"

# Configuration
DIR = '/home/nahear/Thesis/NVFlare/examples/hello-world/ml-to-fl/pt/src/metrics'
METRIC_NAMES = ['valid_metrics','train_metrics']
METRIC_PERSIONS = ['accuracy', 'precision', 'recall', 'f1_score', 'auc_roc', 'auc_pr', 'loss']
PLOT_DIR = '__plot_dir'
os.makedirs(PLOT_DIR, exist_ok=True)

# Get all metric files
metrics = os.listdir(DIR)

# Group files by model and batch size
model_batch_groups = {}
for metric in metrics:
    model_name, batch_size, variant = extract_model_info(metric)
    key = f"{model_name}_{batch_size}"
    if key not in model_batch_groups:
        model_batch_groups[key] = []
    model_batch_groups[key].append(metric)

# Define color/marker styles for just the two variants we care about
variant_styles = {
    'base': ('#1f77b4', 'o'),
    'pos_weight_stochastic': ('#d62728', 'D')
}

# Process and plot
for group_key, group_metrics in model_batch_groups.items():
    model_name, batch_size = group_key.split('_')

    for metric_name in METRIC_NAMES:
        for metric_percision in METRIC_PERSIONS:
            plt.figure(figsize=(12, 6))
            plt.title(f'{model_name} (Batch Size {batch_size}) - {metric_name}_{metric_percision}', fontsize=14)
            plt.xlabel('Step', fontsize=12)
            plt.ylabel(metric_percision.replace('_', ' ').title(), fontsize=12)

            # Filter only base and pos_weight_stochastic
            filtered_metrics = []
            for metric in sorted(group_metrics):
                _, _, variant = extract_model_info(metric)
                if variant in variant_styles:
                    filtered_metrics.append((metric, variant))

            # Plot each metric curve
            for metric, variant in filtered_metrics:
                color, marker = variant_styles[variant]
                try:
                    with open(os.path.join(DIR, metric), 'rb') as f:
                        data = pickle.load(f)

                    x_values = []
                    y_values = []
                    for j, point in enumerate(data.get(metric_name, [])):
                        if metric_percision in point:
                            x_values.append(j)
                            y_values.append(point[metric_percision])

                    if x_values:
                        plt.plot(x_values, y_values, label=variant, color=color, linestyle='-')
                        plt.scatter(x_values, y_values, color=color, marker=marker, s=30, alpha=0.7)
                except Exception as e:
                    print(f"[ERROR] Error processing {metric}: {e}")

            plt.legend(loc='best', fontsize=10)
            plt.grid(True, linestyle='--', alpha=0.3)
            plt.gca().set_facecolor('#f8f8f8')
            plt.tight_layout()
            output_path = os.path.join(PLOT_DIR, f'{model_name}_batch{batch_size}_{metric_name}_{metric_percision}.png')
            plt.savefig(output_path, dpi=300, bbox_inches='tight')
            plt.close()

    # Confusion matrix
    for metric, variant in filtered_metrics:
        try:
            with open(os.path.join(DIR, metric), 'rb') as f:
                data = pickle.load(f)

            valid_data = data.get("valid_metrics", [])
            if not valid_data:
                print(f"[SKIP] No valid_metrics found in {metric}")
                continue

            last_metrics = valid_data[-1]
            required_keys = ['tp', 'fp', 'tn', 'fn']
            if not all(k in last_metrics for k in required_keys):
                print(f"[SKIP] Missing tp/fp/tn/fn in {metric}: available keys = {list(last_metrics.keys())}")
                continue

            tp = last_metrics['tp']
            fp = last_metrics['fp']
            tn = last_metrics['tn']
            fn = last_metrics['fn']

            print(f"[OK] Confusion matrix for {metric}: TP={tp}, FP={fp}, TN={tn}, FN={fn}")

            cm = np.array([[tn, fp],
                           [fn, tp]])
            labels = ['Non-Fraud', 'Fraud']

            plt.figure(figsize=(5, 4))
            sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                        xticklabels=labels, yticklabels=labels)
            plt.xlabel('Predicted Label')
            plt.ylabel('True Label')
            plt.title(f'{model_name} (Batch {batch_size}) - {variant}\nConfusion Matrix')
            plt.tight_layout()
            cm_path = os.path.join(PLOT_DIR, f'{model_name}_batch{batch_size}_{variant}_confusion_matrix.png')
            plt.savefig(cm_path, dpi=300)
            plt.close()
            print(f"[SAVE] Confusion matrix saved to {cm_path}")

        except Exception as e:
            print(f"[ERROR] Error generating confusion matrix for {metric}: {e}")

print("\n All plots (metrics + confusion matrices) generated successfully.")


[SKIP] Missing tp/fp/tn/fn in EnhancedFraudNet_32_15_3e-05_metrics.pickle: available keys = ['accuracy', 'precision', 'recall', 'f1_score', 'auc_roc', 'auc_pr', 'loss']
[SKIP] Missing tp/fp/tn/fn in EnhancedFraudNet_32_15_3e-05_pos_weight_stochastic_metrics.pickle: available keys = ['accuracy', 'precision', 'recall', 'f1_score', 'auc_roc', 'auc_pr', 'loss']
[SKIP] Missing tp/fp/tn/fn in FraudNet_32_15_3e-05_metrics.pickle: available keys = ['accuracy', 'precision', 'recall', 'f1_score', 'auc_roc', 'auc_pr', 'loss']
[SKIP] Missing tp/fp/tn/fn in FraudNet_32_15_3e-05_pos_weight_stochastic_metrics.pickle: available keys = ['accuracy', 'precision', 'recall', 'f1_score', 'auc_roc', 'auc_pr', 'loss']
[SKIP] Missing tp/fp/tn/fn in EnhancedFraudNet_128_15_3e-05_metrics.pickle: available keys = ['accuracy', 'precision', 'recall', 'f1_score', 'auc_roc', 'auc_pr', 'loss']
[SKIP] Missing tp/fp/tn/fn in EnhancedFraudNet_128_15_3e-05_pos_weight_stochastic_metrics.pickle: available keys = ['accuracy'

# Get the best F1 score

In [1]:
import os
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

def extract_model_info(filename):
    base_name = filename.replace('_metrics.pickle', '')
    parts = base_name.split('_')
    model_name = parts[0]
    batch_size = int(parts[1]) if len(parts) > 1 and parts[1].isdigit() else 0
    variant = []
    if "pos_weight" in base_name:
        variant.append("pos_weight")
    if "stochastic" in base_name:
        variant.append("stochastic")
    return model_name, batch_size, "_".join(variant) if variant else "base"

# Configuration
DIR = '/home/nahear/Thesis/NVFlare/examples/hello-world/ml-to-fl/pt/src/metrics'
METRIC_NAMES = ['valid_metrics','train_metrics']
METRIC_PERSIONS = ['precision', 'recall', 'f1_score', 'auc_roc', 'auc_pr', 'loss']
PLOT_DIR = '__plot_dir'
os.makedirs(PLOT_DIR, exist_ok=True)

# Get all metric files
metrics = os.listdir(DIR)

# Group files by model and batch size
model_batch_groups = {}
for metric in metrics:
    model_name, batch_size, variant = extract_model_info(metric)
    key = f"{model_name}_{batch_size}"
    if key not in model_batch_groups:
        model_batch_groups[key] = []
    model_batch_groups[key].append(metric)

# Define color/marker styles for just the two variants we care about
variant_styles = {
    'base': ('#1f77b4', 'o'),
    'pos_weight_stochastic': ('#d62728', 'D')
}

# Group metrics by model name
model_metrics = {}
for metric in metrics:
    model_name, batch_size, variant = extract_model_info(metric)
    if model_name not in model_metrics:
        model_metrics[model_name] = []
    model_metrics[model_name].append(metric)

# For each model, find the maximum F1 score and corresponding metrics
for model_name, model_files in sorted(model_metrics.items()):
    print(f"\n{model_name}:")
    
    for metric_file in sorted(model_files):
        model_name, batch_size, variant = extract_model_info(metric_file)
        
        try:
            with open(os.path.join(DIR, metric_file), 'rb') as f:
                data = pickle.load(f)
            
            valid_metrics = data.get('valid_metrics', [])
            if not valid_metrics:
                print(f"  [SKIP] No validation metrics found in {metric_file}")
                continue
                
            # Find maximum F1 score and its corresponding metrics
            max_f1 = -float('inf')
            max_f1_idx = -1
            
            for i, metrics_point in enumerate(valid_metrics):
                if 'f1_score' in metrics_point and metrics_point['f1_score'] > max_f1:
                    max_f1 = metrics_point['f1_score']
                    max_f1_idx = i
            
            if max_f1_idx >= 0:
                best_metrics = valid_metrics[max_f1_idx]
                print(f"  {metric_file} (Batch {batch_size}, {variant}):")
                print(f"    Max F1: {best_metrics['f1_score']:.4f}")
                print(f"    Precision: {best_metrics.get('precision', 'N/A'):.4f}")
                print(f"    Recall: {best_metrics.get('recall', 'N/A'):.4f}")
                print(f"    Loss: {best_metrics.get('loss', 'N/A'):.4f}")
                print(f"    Epoch/Step: {max_f1_idx}")
            else:
                print(f"  [SKIP] No F1 score found in {metric_file}")
                
        except Exception as e:
            print(f"  [ERROR] Error processing {metric_file}: {e}")



EnhancedFraudNet:
  EnhancedFraudNet_128_15_3e-05_metrics.pickle (Batch 128, base):
    Max F1: 80.4598
    Precision: 78.6517
    Recall: 82.3529
    Loss: 0.0225
    Epoch/Step: 0
  EnhancedFraudNet_128_15_3e-05_pos_weight_stochastic_metrics.pickle (Batch 128, pos_weight_stochastic):
    Max F1: 77.0950
    Precision: 73.4043
    Recall: 81.1765
    Loss: 0.0658
    Epoch/Step: 3
  EnhancedFraudNet_256_15_3e-05_metrics.pickle (Batch 256, base):
    Max F1: 78.6517
    Precision: 75.2688
    Recall: 82.3529
    Loss: 0.0632
    Epoch/Step: 1
  EnhancedFraudNet_256_15_3e-05_pos_weight_stochastic_metrics.pickle (Batch 256, pos_weight_stochastic):
    Max F1: 79.7688
    Precision: 78.4091
    Recall: 81.1765
    Loss: 0.0408
    Epoch/Step: 4
  EnhancedFraudNet_32_15_3e-05_metrics.pickle (Batch 32, base):
    Max F1: 83.6364
    Precision: 86.2500
    Recall: 81.1765
    Loss: 0.0073
    Epoch/Step: 2
  EnhancedFraudNet_32_15_3e-05_pos_weight_stochastic_metrics.pickle (Batch 32, pos_we