# GRMP Attack Experiment - Google Colab

This notebook runs the Graph Representation-based Model Poisoning (GRMP) attack experiment on AG News dataset.

**Paper**: Graph Representation-based Model Poisoning on the Heterogeneous Internet of Agents

## Setup Instructions

1. **Enable GPU**: Runtime ‚Üí Change runtime type ‚Üí GPU
2. **Fetch Code**: Run **Step 0** to clone/download the repo if only this notebook was uploaded.
3. **Run all cells**: Runtime ‚Üí Run all
4. **View results**: Check the `results/` folder for outputs and visualizations



## Step 0: Fetch Code
If you only uploaded this notebook, run this to clone the repository and set the working directory.
If you've already uploaded the Python files, it will reuse them without cloning.


In [None]:
# Fetch repository and set working directory
import os, sys, subprocess
from pathlib import Path

REPO_URL = 'https://github.com/GuangLun2000/IoA-Attack-GRMP.git'
REPO_DIR = Path('IoA-Attack-GRMP')

def code_files_present():
    return Path('main.py').exists() and Path('client.py').exists()

if code_files_present():
    print('‚úÖ Code files found in current directory.')
else:
    if REPO_DIR.exists():
        print(f'üîÅ Using existing folder: {REPO_DIR}')
    else:
        print(f'üì• Cloning {REPO_URL} ...')
        subprocess.run(['git', 'clone', '--depth', '1', REPO_URL], check=True)
    os.chdir(REPO_DIR)
    print(f"‚úÖ Switched to {Path('.').resolve()}")

# Ensure current path is importable for subsequent cells
sys.path.append(str(Path('.').resolve()))
print(f"üìÇ Working directory: {Path('.').resolve()}")



## Step 1: Install Dependencies


In [None]:
# Install required packages
from pathlib import Path
req = Path('requirements.txt')
if req.exists():
    print('Installing from requirements.txt ...')
    %pip install -q -r requirements.txt
else:
    print('requirements.txt not found; installing explicit package list...')
    %pip install -q torch>=2.0.0 transformers>=4.35.0 datasets>=2.0.0 numpy>=1.21.0 scikit-learn>=1.0.0 pandas>=1.3.0 tqdm>=4.62.0 matplotlib>=3.4.0 seaborn>=0.11.0

print('‚úÖ Dependencies installed successfully!')


## Step 2: Verify Files and GPU


In [None]:
# Check if files exist
import os
from pathlib import Path

required_files = ['main.py', 'client.py', 'server.py', 'data_loader.py', 'models.py', 'visualization.py']
missing_files = [f for f in required_files if not os.path.exists(f)]

if missing_files:
    print(f"‚ö†Ô∏è  Missing files: {missing_files}")
    print("Please upload these files to Colab using the file uploader.")
else:
    print("‚úÖ All required files found!")
    for f in required_files:
        print(f"  - {f}")

# Check GPU
import torch
print(f"\nPyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"GPU Device: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    print("‚ö†Ô∏è  No GPU detected. Training will be slower.")
    print("   Go to Runtime ‚Üí Change runtime type ‚Üí GPU")


## Step 3: Configure Experiment

Choose one of the configurations below:
- **Quick Test**: Faster execution (~10-15 min), reduced rounds and dataset
- **Full Experiment**: Complete experiment (~1-2 hours), full dataset


In [None]:
# Experiment Configuration
# Modify these parameters as needed

EXPERIMENT_CONFIG = {
        # ========== Experiment Configuration ==========
        'experiment_name': 'vgae_grmp_attack',  # Name for result files and logs
        'seed': 42,  # Random seed for reproducibility (int)
        
        # ========== Federated Learning Setup ==========
        'num_clients': 6,  # Total number of federated learning clients (int)
        'num_attackers': 2,  # Number of attacker clients (int, must be < num_clients)
        'num_rounds': 30,  # Total number of federated learning rounds (int)
        
        # ========== Training Hyperparameters ==========
        'client_lr': 2e-5,  # Learning rate for local client training (float)
        'server_lr': 0.8,  # Server learning rate for model aggregation (float, typically 0.5-1.0)
        'batch_size': 128,  # Batch size for local training (int)
        'test_batch_size': 128,  # Batch size for test/validation data loaders (int)
        # 'local_epochs': 5,  # Number of local training epochs per round (int, per paper Section IV)
        'local_epochs': 2,  # Number of local training epochs per round (int, per paper Section IV)
        'alpha': 0.01,  # Proximal regularization coefficient Œ± ‚àà [0,1] from paper formula (1) (float)
        
        # ========== Data Distribution ==========
        'dirichlet_alpha': 0.5,  # Dirichlet distribution parameter for non-IID data partitioning (float, lower = more heterogeneous)
        'test_sample_rate': 1.0,  # Rate of Business samples to test for ASR evaluation (float, 1.0 = all samples)
        # 'dataset_size_limit': None,  # Limit dataset size for faster experimentation (None = use FULL AG News dataset per paper, int = limit training samples)
        'dataset_size_limit': 10000,  # Limit dataset size for faster experimentation (None = use FULL AG News dataset per paper, int = limit training samples)

        # ========== Attack Configuration ==========
        'poison_rate': 1.0,  # Base poisoning rate for attack phase (float, 0.0-1.0)
        'attack_start_round': 0,  # Round when attack phase starts (int, learning phase before this round)
        
        # ========== Formula 4 Constraint Parameters ==========
        'd_T': 0.5,  # Distance threshold for constraint (4b): d(w'_j(t), w_g(t)) ‚â§ d_T (float)
        'gamma': 10.0,  # Upper bound for constraint (4c): Œ£ Œ≤'_{i,j}(t) d(w_i(t), wÃÑ_i(t)) ‚â§ Œì (float)
        
        # ========== VGAE Training Parameters ==========
        'dim_reduction_size': 10000,  # Dimensionality for feature reduction in VGAE (int, adjust based on GPU memory)
        'vgae_epochs': 30,  # Number of epochs for VGAE training per camouflage step (int)
        'vgae_lr': 0.01,  # Learning rate for VGAE optimizer (float)
        'vgae_lambda': 0.5,  # Weight for preservation loss in camouflage optimization (float, balances attack efficacy vs camouflage)
        
        # ========== Camouflage Optimization Parameters ==========
        'camouflage_steps': 50,  # Number of optimization steps for malicious update camouflage (int)
        'camouflage_lr': 0.1,  # Learning rate for camouflage optimization (float)
        'lambda_proximity': 2.0,  # Weight for constraint (4b) proximity loss in camouflage (float)
        'lambda_aggregation': 0.5,  # Weight for constraint (4c) aggregation loss in camouflage (float)
        
        # ========== Graph Construction Parameters ==========
        'graph_threshold': 0.5,  # Threshold for graph adjacency matrix binarization in VGAE (float, 0.0-1.0)
        
        # ========== Defense Mechanism Parameters ==========
        'defense_threshold': 0.05,  # Base threshold for defense mechanism (float, lower = more strict)
        'tolerance_factor': 3.0,  # Tolerance factor for defense mechanism (float, higher = more lenient)
        'similarity_alpha': 0.5,  # Weight for pairwise similarities in mixed similarity computation (float, 0.0-1.0)
        
        # ========== Visualization ==========
        'generate_plots': True,  # Whether to generate visualization plots (bool)
        'run_both_experiments': False,  # Set to True to run baseline + attack (for Figure 5)
        'run_attack_only': False,  # Set to True to only run attack experiment
}

print("‚úÖ Configuration loaded!")
print(f"\nExperiment: {EXPERIMENT_CONFIG['experiment_name']}")
print(f"Rounds: {EXPERIMENT_CONFIG['num_rounds']}")
print(f"Dataset limit: {EXPERIMENT_CONFIG['dataset_size_limit'] or 'Full dataset'}")


In [None]:
# Quick Test Configuration (uncomment to use)
# QUICK_TEST_CONFIG = {
#     'experiment_name': 'colab_quick_test',
#     'seed': 42,
#     'num_clients': 6,
#     'num_attackers': 2,
#     'num_rounds': 5,  # Reduced rounds
#     'client_lr': 2e-5,
#     'server_lr': 0.8,
#     'batch_size': 16,
#     'local_epochs': 5,
#     'alpha': 0.01,
#     'dirichlet_alpha': 0.5,
#     'test_sample_rate': 1.0,
#     'dataset_size_limit': 10000,  # Limited dataset
#     'poison_rate': 1.0,
#     'attack_start_round': 3,
#     'd_T': 0.5,
#     'gamma': 10.0,
#     'dim_reduction_size': 5000,
#     'vgae_epochs': 10,
#     'vgae_lr': 0.01,
#     'vgae_lambda': 0.5,
#     'camouflage_steps': 20,
#     'camouflage_lr': 0.1,
#     'lambda_proximity': 1.0,
#     'lambda_aggregation': 0.5,
#     'graph_threshold': 0.5,
#     'defense_threshold': 0.10,
#     'similarity_alpha': 0.7,
#     'generate_plots': True,
#     'run_both_experiments': False,
#     'run_attack_only': False,
# }

# To use quick test: EXPERIMENT_CONFIG = QUICK_TEST_CONFIG


## Step 4: Run Experiment


In [None]:
# Import and run the experiment
import sys
import warnings
warnings.filterwarnings('ignore')

# Import modules
from main import run_experiment, analyze_results

# Run experiment
print("üöÄ Starting GRMP Attack Experiment...")
print("=" * 60)

try:
    results, metrics = run_experiment(EXPERIMENT_CONFIG)
    
    # Analyze results
    analyze_results(metrics)
    
    print("\n‚úÖ Experiment completed successfully!")
except Exception as e:
    print(f"\n‚ùå Experiment failed: {e}")
    import traceback
    traceback.print_exc()


## Step 5: View Results and Visualizations


In [None]:
# Display visualization plots
from IPython.display import Image, display
from pathlib import Path

results_dir = Path("results")
experiment_name = EXPERIMENT_CONFIG['experiment_name']

# List of figures to display
figures = [
    ("Figure 3: Global Accuracy and ASR", f"{experiment_name}_figure3.png"),
    ("Figure 4: Cosine Similarity", f"{experiment_name}_figure4.png"),
    ("Figure 5: Local Accuracy (No Attack)", f"{experiment_name}_figure5.png"),
    ("Figure 6: Local Accuracy (With Attack)", f"{experiment_name}_figure6.png"),
]

print("üìä Displaying Visualization Figures:")
print("=" * 60)

for fig_title, fig_name in figures:
    fig_path = results_dir / fig_name
    if fig_path.exists():
        print(f"\n‚úÖ {fig_title}")
        display(Image(str(fig_path)))
    else:
        print(f"\n‚ö†Ô∏è  {fig_title} not found")


In [None]:
# Load and display experiment results summary
import json
from pathlib import Path

results_dir = Path("results")
experiment_name = EXPERIMENT_CONFIG['experiment_name']
results_path = results_dir / f"{experiment_name}_results.json"

if results_path.exists():
    with open(results_path, 'r') as f:
        results_data = json.load(f)
    
    print("üìä Experiment Results Summary:")
    print("=" * 60)
    
    # Display key metrics
    rounds = results_data['progressive_metrics']['rounds']
    clean_acc = results_data['progressive_metrics']['clean_acc']
    attack_asr = results_data['progressive_metrics']['attack_asr']
    
    print(f"\nTotal Rounds: {len(rounds)}")
    print(f"Final Clean Accuracy: {clean_acc[-1]:.4f}")
    print(f"Final Attack Success Rate (ASR): {attack_asr[-1]:.4f}")
    print(f"Peak ASR: {max(attack_asr):.4f}")
    
    # Display per-round summary
    print("\nüìà Per-Round Summary:")
    print("Round | Clean Acc | ASR")
    print("-" * 30)
    for i, (r, acc, asr) in enumerate(zip(rounds, clean_acc, attack_asr)):
        if i % 5 == 0 or i == len(rounds) - 1:  # Show every 5th round and last round
            print(f"{r:5d} | {acc:9.4f} | {asr:.4f}")
    
    # Display local accuracies if available
    if 'local_accuracies' in results_data and results_data['local_accuracies']:
        print("\nüìä Local Accuracies (Last Round):")
        local_accs = results_data['local_accuracies']
        for client_id, accs in sorted(local_accs.items()):
            if accs:
                print(f"  Client {client_id}: {accs[-1]:.4f}")
else:
    print(f"‚ö†Ô∏è  Results file not found: {results_path}")


## Step 6: Download Results


In [None]:
# Create a zip file with all results
import zipfile
from pathlib import Path

results_dir = Path("results")
zip_path = "grmp_experiment_results.zip"

if results_dir.exists():
    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for file_path in results_dir.rglob('*'):
            if file_path.is_file():
                zipf.write(file_path, file_path.relative_to(results_dir.parent))
    
    print(f"‚úÖ Created zip file: {zip_path}")
    print(f"\nüì• Download the file using the cell below")
else:
    print("‚ö†Ô∏è  Results directory not found.")


In [None]:
# Download results zip file
from google.colab import files

if Path("grmp_experiment_results.zip").exists():
    files.download('grmp_experiment_results.zip')
    print("‚úÖ Download started!")
else:
    print("‚ö†Ô∏è  Zip file not found. Run the previous cell first.")