In [None]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Global variables
root_path = "/content/drive/MyDrive/Colab Notebooks/Federated Learning/Final_code/"

data_folder = root_path + "F-MNIST_result_new"
dataset_name = "Fashion - MNIST"

COLORS = {
    'FedAdam': '#e74c3c',
    'FedAvg': '#3498db',
    'FedProx': '#27ae60'
}

def load_data(data_folder=data_folder, dataset_name=dataset_name):
    """
    Load tất cả file JSON từ thư mục
    Returns: dict containing loaded data
    """
    data_folder = Path(data_folder)
    data = {}

    print(f"🔄 Đang load dữ liệu từ {data_folder}...")

    # Định nghĩa các file cần load
    file_patterns = {

        'FedAvg_0.3': ['alpha_03/FedAvg_fmnist_03.json', 'alpha_03/FedAvg_cifar10_03.json'],
        'FedProx_0.3': ['alpha_03/FedProx_fmnist_03.json', 'alpha_03/FedProx_cifar10_03.json'],
        'FedAdam_0.3': ['alpha_03/FedAdam_fmnist_03.json', 'alpha_03/FedAdam_cifar10_03.json'],

        'FedAvg_0.9': ['alpha_09/FedAvg_fmnist_09.json', 'alpha_09/FedAvg_cifar10_09.json'],
        'FedProx_0.9': ['alpha_09/FedProx_fmnist_09.json', 'alpha_09/FedProx_cifar10_09.json'],
        'FedAdam_0.9': ['alpha_09/FedAdam_fmnist_09.json', 'alpha_09/FedAdam_cifar10_09.json'],
    }

    loaded_count = 0
    dataset_suffix = 'fmnist' if 'MNIST' in dataset_name else 'cifar10'

    for key, patterns in file_patterns.items():
        # Chọn pattern phù hợp với dataset
        pattern = patterns[0] if 'fmnist' in patterns[0] else patterns[1]
        if dataset_suffix not in pattern:
            pattern = patterns[1] if dataset_suffix == 'cifar10' else patterns[0]

        file_path = data_folder / pattern

        if file_path.exists():
            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    data_content = json.load(f)

                # Parse thông tin từ key
                parts = key.split('_')
                algorithm = parts[0]
                alpha = float(parts[1])

                data[key] = {
                    'algorithm': algorithm,
                    'alpha': alpha,
                    'metrics': data_content['metrics'],
                    'strategy_name': data_content.get('strategy_name', algorithm)
                }
                loaded_count += 1
                print(f"✅ Loaded: {file_path}")

            except Exception as e:
                print(f"❌ Lỗi load {file_path}: {e}")
        else:
            print(f"⚠️  Không tìm thấy: {file_path}")

    print(f"🎉 Đã load thành công {loaded_count}/6 files cho {dataset_name}")
    return data if loaded_count > 0 else None



In [None]:
data = load_data(data_folder= data_folder, dataset_name= dataset_name)

In [None]:
def create_summary_stats(data, dataset_name=dataset_name):
    """Tạo thống kê tổng quan"""
    if not data:
        print("❌ Chưa có dữ liệu để phân tích!")
        return

    print(f"\n📊 THỐNG KÊ TỔNG QUAN - {dataset_name}")
    print("="*60)

    for key, data_item in data.items():
        metrics = data_item['metrics']
        final_train_acc = metrics[-1]['train_acc'] * 100
        final_val_acc = metrics[-1].get('val_acc', 0) * 100
        total_time_hours = sum(m.get('round_total_time', 0) for m in metrics) / 3600

        print(f"\n{data_item['algorithm']} (α={data_item['alpha']}):")
        print(f"  📈 Final Train Acc: {final_train_acc:.2f}%")
        print(f"  🎯 Final Val Acc: {final_val_acc:.2f}%")
        print(f"  ⏱️  Total Time: {total_time_hours:.2f} hours")
        print(f"  🔄 Total Rounds: {len(metrics)}")



In [None]:
print(f"\n🚀 BẮT ĐẦU PHÂN TÍCH FEDERATED LEARNING - {dataset_name}")
print("="*80)

# 1. Thống kê tổng quan
create_summary_stats(data, dataset_name)


In [None]:
def plot_individual_results(data, dataset_name=dataset_name):
    """1. Trực quan kết quả từng thuật toán"""
    if not data:
        return

    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    fig.suptitle(f'Kết quả từng thuật toán - {dataset_name}', fontsize=16, fontweight='bold')

    algorithms = ['FedAvg', 'FedProx', 'FedAdam']

    for i, algorithm in enumerate(algorithms):
        # Training metrics
        ax1 = axes[0, i]
        ax2 = ax1.twinx()

        for alpha in [0.3, 0.9]:
            key = f"{algorithm}_{alpha}"
            if key in data:
                metrics = data[key]['metrics']
                # Fix: Use safer data access with enumerate
                rounds = [m.get('round', i+1) for i, m in enumerate(metrics)]
                train_acc = [m['train_acc'] * 100 for m in metrics]
                train_loss = [m['train_loss'] for m in metrics]

                # Plot accuracy
                line1 = ax1.plot(rounds, train_acc, '-',
                               label=f'Train Acc α={alpha}',
                               color=COLORS[algorithm],
                               alpha=0.8 if alpha == 0.3 else 0.5,
                               linewidth=2)

                # Plot loss
                line2 = ax2.plot(rounds, train_loss, '--',
                               label=f'Train Loss α={alpha}',
                               color=COLORS[algorithm],
                               alpha=0.6 if alpha == 0.3 else 0.3,
                               linewidth=2)

        ax1.set_xlabel('Round')
        ax1.set_ylabel('Training Accuracy (%)', color='black')
        ax2.set_ylabel('Training Loss', color='gray')
        ax1.set_title(f'{algorithm} - Training Metrics')
        ax1.grid(True, alpha=0.3)


        # Validation metrics
        ax3 = axes[1, i]
        ax4 = ax3.twinx()

        for alpha in [0.3, 0.9]:
            key = f"{algorithm}_{alpha}"
            if key in data:
                metrics = data[key]['metrics']
                # Fix: Use safer data access with enumerate
                rounds = [m.get('round', i+1) for i, m in enumerate(metrics)]
                val_acc = [m.get('val_acc', 0) * 100 for m in metrics]
                val_loss = [m.get('val_loss', 0) for m in metrics]

                ax3.plot(rounds, val_acc, '-',
                       label=f'Val Acc α={alpha}',
                       color=COLORS[algorithm],
                       alpha=0.8 if alpha == 0.3 else 0.5,
                       linewidth=2)

                ax4.plot(rounds, val_loss, '--',
                       label=f'Val Loss α={alpha}',
                       color=COLORS[algorithm],
                       alpha=0.6 if alpha == 0.3 else 0.3,
                       linewidth=2)

        ax3.set_xlabel('Round')
        ax3.set_ylabel('Validation Accuracy (%)', color='black')
        ax4.set_ylabel('Validation Loss', color='gray')
        ax3.set_title(f'{algorithm} - Validation Metrics')
        ax3.grid(True, alpha=0.3)

        ax1.legend(loc='center right')
        ax2.legend(loc='upper right', bbox_to_anchor=(1.01, 0.4))
        ax3.legend(loc='center right')
        ax4.legend(loc='upper right', bbox_to_anchor=(1.01, 0.4))

    plt.tight_layout()
    plt.savefig(f'individual_results_{dataset_name.lower().replace("-", "_")}.png',
               dpi=300, bbox_inches='tight')
    plt.show()

In [None]:
# 2. Các biểu đồ phân tích
print("\n📊 Đang tạo biểu đồ...")
plot_individual_results(data, dataset_name)

In [None]:
def plot_individual_algorithm_metrics(data, dataset_name=dataset_name):
    """
    Trực quan chi tiết từng thuật toán với 4 metric riêng biệt
    Mỗi thuật toán sẽ có 1 figure với 4 subplot: train_acc, train_loss, val_acc, val_loss
    """
    if not data:
        print("❌ Chưa có dữ liệu để phân tích!")
        return

    algorithms = ['FedAvg', 'FedProx', 'FedAdam']

    for algorithm in algorithms:
        # Tạo figure cho từng thuật toán
        fig, axes = plt.subplots(2, 2, figsize=(16, 12))
        fig.suptitle(f'Chi tiết metrics cho {algorithm} - {dataset_name}',
                     fontsize=16, fontweight='bold')

        # Define metrics and their corresponding subplot positions
        metrics_config = [
            ('train_acc', 'Training Accuracy (%)', axes[0, 0], True),  # True means convert to percentage
            ('train_loss', 'Training Loss', axes[0, 1], False),
            ('val_acc', 'Validation Accuracy (%)', axes[1, 0], True),
            ('val_loss', 'Validation Loss', axes[1, 1], False)
        ]

        for metric_key, ylabel, ax, to_percentage in metrics_config:
            for alpha in [0.3, 0.9]:
                key = f"{algorithm}_{alpha}"
                if key in data:
                    metrics = data[key]['metrics']
                    rounds = [m.get('round', i+1) for i, m in enumerate(metrics)]

                    # Get metric values
                    if to_percentage:
                        values = [m.get(metric_key, 0) * 100 for m in metrics]
                    else:
                        values = [m.get(metric_key, 0) for m in metrics]

                    # Plot with different styles for different alphas
                    linestyle = '-' if alpha == 0.3 else '--'
                    alpha_opacity = 0.8 if alpha == 0.3 else 0.6
                    linewidth = 2.5 if alpha == 0.3 else 2

                    ax.plot(rounds, values, linestyle,
                           label=f'α = {alpha}',
                           color=COLORS[algorithm],
                           linewidth=linewidth)

            # Customize subplot
            ax.set_xlabel('Round', fontsize=12)
            ax.set_ylabel(ylabel, fontsize=12)
            ax.set_title(f'{algorithm} - {ylabel}', fontsize=14)
            ax.legend(fontsize=11)
            ax.grid(True, alpha=0.3)

            # Add some styling
            ax.spines['top'].set_visible(False)
            ax.spines['right'].set_visible(False)

            # Set y-axis limits for better visualization
            if 'acc' in metric_key.lower() and to_percentage:
                ax.set_ylim(0, 100)
            elif 'loss' in metric_key.lower():
                # For loss, set a reasonable upper limit
                max_val = max([m.get(metric_key, 0) for m in data[f"{algorithm}_0.3"]['metrics']] +
                            [m.get(metric_key, 0) for m in data[f"{algorithm}_0.9"]['metrics']]
                            if f"{algorithm}_0.3" in data and f"{algorithm}_0.9" in data else [1])
                ax.set_ylim(0, max_val * 1.1)

        plt.tight_layout()
        plt.savefig(f'{algorithm.lower()}_detailed_metrics_{dataset_name.lower().replace("-", "_")}.png',
                   dpi=300, bbox_inches='tight')
        plt.show()

        # Print summary statistics for this algorithm
        print(f"\n📊 THỐNG KÊ CHI TIẾT CHO {algorithm}")
        print("-" * 50)

        for alpha in [0.3, 0.9]:
            key = f"{algorithm}_{alpha}"
            if key in data:
                metrics = data[key]['metrics']
                last_20 = metrics[-20:] if len(metrics) >= 20 else metrics

                final_train_acc = metrics[-1]['train_acc'] * 100
                final_val_acc = metrics[-1].get('val_acc', 0) * 100
                final_train_loss = metrics[-1]['train_loss']
                final_val_loss = metrics[-1].get('val_loss', 0)

                avg_train_acc = np.mean([m['train_acc'] for m in last_20]) * 100
                avg_val_acc = np.mean([m.get('val_acc', 0) for m in last_20]) * 100

                print(f"α = {alpha}:")
                print(f"  📈 Final Train Acc: {final_train_acc:.2f}% | Avg (20 rounds): {avg_train_acc:.2f}%")
                print(f"  🎯 Final Val Acc: {final_val_acc:.2f}% | Avg (20 rounds): {avg_val_acc:.2f}%")
                print(f"  📉 Final Train Loss: {final_train_loss:.4f}")
                print(f"  📉 Final Val Loss: {final_val_loss:.4f}")


def plot_comparative_summary(data, dataset_name=dataset_name):
    """
    Biểu đồ tổng quan so sánh tất cả thuật toán trên 4 metrics chính
    """
    if not data:
        return

    fig, axes = plt.subplots(2, 2, figsize=(18, 14))
    fig.suptitle(f'So sánh tổng quan tất cả thuật toán - {dataset_name}',
                 fontsize=18, fontweight='bold')

    algorithms = ['FedAvg', 'FedProx', 'FedAdam']
    metrics_config = [
        ('train_acc', 'Training Accuracy (%)', axes[0, 0], True),
        ('train_loss', 'Training Loss', axes[0, 1], False),
        ('val_acc', 'Validation Accuracy (%)', axes[1, 0], True),
        ('val_loss', 'Validation Loss', axes[1, 1], False)
    ]

    for metric_key, ylabel, ax, to_percentage in metrics_config:
        for algorithm in algorithms:
            for alpha in [0.3, 0.9]:
                key = f"{algorithm}_{alpha}"
                if key in data:
                    metrics = data[key]['metrics']
                    rounds = [m.get('round', i+1) for i, m in enumerate(metrics)]

                    if to_percentage:
                        values = [m.get(metric_key, 0) * 100 for m in metrics]
                    else:
                        values = [m.get(metric_key, 0) for m in metrics]

                    linestyle = '-' if alpha == 0.3 else '--'
                    alpha_opacity = 0.8 if alpha == 0.3 else 0.5

                    ax.plot(rounds, values, linestyle,
                           label=f'{algorithm} α={alpha}',
                           color=COLORS[algorithm],
                           alpha=alpha_opacity,
                           linewidth=2)

        ax.set_xlabel('Round', fontsize=12)
        ax.set_ylabel(ylabel, fontsize=12)
        ax.set_title(ylabel, fontsize=14, fontweight='bold')
        ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=10)
        ax.grid(True, alpha=0.3)

        # Styling
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)

    plt.tight_layout()
    plt.savefig(f'comparative_summary_{dataset_name.lower().replace("-", "_")}.png',
               dpi=300, bbox_inches='tight')
    plt.show()


# Thêm các hàm mới vào phần chính của code
print("\n🎨 Đang tạo biểu đồ chi tiết cho từng thuật toán...")
plot_individual_algorithm_metrics(data, dataset_name)


In [None]:

def plot_comparison_by_alpha(data, dataset_name=dataset_name):
    """2. So sánh kết quả theo độ alpha"""
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle(f'So sánh kết quả theo Alpha - {dataset_name}', fontsize=16, fontweight='bold')

    for alpha_idx, alpha in enumerate([0.3, 0.9]):
        # Training Accuracy
        ax1 = axes[0, alpha_idx]
        # Validation Accuracy
        ax2 = axes[1, alpha_idx]

        for algorithm in ['FedAvg', 'FedProx', 'FedAdam']:
            key = f"{algorithm}_{alpha}"
            if key in data:
                metrics = data[key]['metrics']
                # Fix: Use safer data access with enumerate
                rounds = [m.get('round', i+1) for i, m in enumerate(metrics)]
                train_acc = [m['train_acc'] * 100 for m in metrics]
                val_acc = [m.get('val_acc', 0) * 100 for m in metrics]

                ax1.plot(rounds, train_acc, '-', label=algorithm,
                       color=COLORS[algorithm], linewidth=2, marker='o', markersize=3)
                ax2.plot(rounds, val_acc, '-', label=algorithm,
                       color=COLORS[algorithm], linewidth=2, marker='s', markersize=3)

        ax1.set_title(f'Training Accuracy (α={alpha})')
        ax1.set_xlabel('Round')
        ax1.set_ylabel('Accuracy (%)')
        ax1.legend()
        ax1.grid(True, alpha=0.3)

        ax2.set_title(f'Validation Accuracy (α={alpha})')
        ax2.set_xlabel('Round')
        ax2.set_ylabel('Accuracy (%)')
        ax2.legend()
        ax2.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.savefig(f'comparison_by_alpha_{dataset_name.lower().replace("-", "_")}.png',
               dpi=300, bbox_inches='tight')
    plt.show()



In [None]:
plot_comparison_by_alpha(data, dataset_name)

In [None]:
def format_time(seconds):
    """Chuyển đổi giây thành định dạng thời gian dễ đọc"""
    if seconds < 60:
        return f"{seconds:.1f}s"
    elif seconds < 3600:
        minutes = seconds / 60
        return f"{minutes:.1f} phút"
    else:
        hours = seconds / 3600
        return f"{hours:.2f} giờ"

In [None]:
def plot_time_comparison(data, dataset_name=dataset_name):
    """3. So sánh thời gian training - ĐÃ SỬA ĐỂ HIỂN THỊ ĐÚNG THỜI GIAN"""
    fig, axes = plt.subplots(2, 2, figsize=(16, 12)) # Changed from 2, 2 to 3, 2
    fig.suptitle(f'So sánh thời gian Training - {dataset_name}', fontsize=16, fontweight='bold')

    for alpha_idx, alpha in enumerate([0.3, 0.9]):
        # Thời gian mỗi round (giây)
        ax1 = axes[0, alpha_idx]
        # Thời gian tích lũy (giờ)
        ax2 = axes[1, alpha_idx]

        for algorithm in ['FedAvg', 'FedProx', 'FedAdam']:
            key = f"{algorithm}_{alpha}"
            if key in data:
                metrics = data[key]['metrics']
                # Fix: Use safer data access with enumerate
                rounds = [m.get('round', i+1) for i, m in enumerate(metrics)]
                round_times_seconds = [m.get('round_total_time', 0) for m in metrics]  # Giữ nguyên giây
                cumulative_times_hours = np.cumsum(round_times_seconds) / 3600  # Chuyển thành giờ

                # Plot thời gian mỗi round (giây)
                ax1.plot(rounds, round_times_seconds, '-', label=algorithm,
                       color=COLORS[algorithm], linewidth=2, alpha=0.7)

                # Plot thời gian tích lũy (giờ)
                ax2.plot(rounds, cumulative_times_hours, '-', label=algorithm,
                       color=COLORS[algorithm], linewidth=2, marker='o', markersize=3)

        ax1.set_title(f'Thời gian mỗi Round (α={alpha})')
        ax1.set_xlabel('Round')
        ax1.set_ylabel('Thời gian (giây)')
        ax1.legend()
        ax1.grid(True, alpha=0.3)

        ax2.set_title(f'Thời gian tích lũy (α={alpha})')
        ax2.set_xlabel('Round')
        ax2.set_ylabel('Tổng thời gian (giờ)')
        ax2.legend()
        ax2.grid(True, alpha=0.3)

    # 2. Biểu đồ thời gian trung bình mỗi round (phút)
    algorithms = ['FedAdam', 'FedAvg', 'FedProx']
    width = 0.5  # tăng chiều rộng cột cho sát nhau hơn

    # Lấy thời gian trung bình mỗi round theo phút
    avg_times_03 = []
    avg_times_09 = []

    for algorithm in algorithms:
        key_03 = f"{algorithm}_0.3"
        key_09 = f"{algorithm}_0.9"

        time_03 = np.mean([
            m.get('round_total_time', 0) for m in data.get(key_03, {}).get('metrics', [])
        ]) / 60 if key_03 in data else 0

        time_09 = np.mean([
            m.get('round_total_time', 0) for m in data.get(key_09, {}).get('metrics', [])
        ]) / 60 if key_09 in data else 0

        avg_times_03.append((algorithm, time_03))
        avg_times_09.append((algorithm, time_09))

    # Sắp xếp giảm dần theo thời gian
    avg_times_03.sort(key=lambda x: x[1], reverse=True)
    avg_times_09.sort(key=lambda x: x[1], reverse=True)

    # Tách lại dữ liệu sau khi sắp
    algos_03, times_03 = zip(*avg_times_03)
    algos_09, times_09 = zip(*avg_times_09)
    x_03 = np.arange(len(algos_03))
    x_09 = np.arange(len(algos_09))

    # Tạo biểu đồ
    fig, axes = plt.subplots(1, 2, figsize=(12, 5), sharey=True)
    fig.suptitle('Thời gian trung bình mỗi Round theo Alpha', fontsize=16, fontweight='bold')

    # Biểu đồ α = 0.3
    bars1 = axes[0].bar(x_03, times_03, width, label='α=0.3', color='#3498db', alpha=0.85)
    axes[0].set_title('α = 0.3', fontsize=14)
    axes[0].set_xlabel('Thuật toán', fontsize=12)
    axes[0].set_ylabel('Thời gian (phút)', fontsize=12)
    axes[0].set_xticks(x_03)
    axes[0].set_xticklabels(algos_03)
    axes[0].grid(True, alpha=0.3, axis='y')
    for i, bar in enumerate(bars1):
        height = bar.get_height()
        axes[0].text(bar.get_x() + bar.get_width() / 2, height + 0.05,
                    f'{height:.2f}', ha='center', va='bottom', fontsize=9)

    # Biểu đồ α = 0.9
    bars2 = axes[1].bar(x_09, times_09, width, label='α=0.9', color='#e74c3c', alpha=0.85)
    axes[1].set_title('α = 0.9', fontsize=14)
    axes[1].set_xlabel('Thuật toán', fontsize=12)
    axes[1].set_xticks(x_09)
    axes[1].set_xticklabels(algos_09)
    axes[1].grid(True, alpha=0.3, axis='y')
    for i, bar in enumerate(bars2):
        height = bar.get_height()
        axes[1].text(bar.get_x() + bar.get_width() / 2, height + 0.05,
                    f'{height:.2f}', ha='center', va='bottom', fontsize=9)

    # Y-axis đồng bộ và thẩm mỹ
    y_min = min(min(times_03), min(times_09)) - 0.1
    y_max = max(max(times_03), max(times_09)) + 0.3
    axes[0].set_ylim(y_min, y_max)
    axes[1].set_ylim(y_min, y_max)

    plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.show()

In [None]:
plot_time_comparison(data, dataset_name)

In [None]:
import json
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

COLORS = {
    'FedAvg': '#3498db',
    'FedProx': '#27ae60',
    'FedAdam': '#e74c3c',
}

def create_final_stats_table(data, dataset_name=""):
    """In bảng thống kê và vẽ biểu đồ cột + line Accuracy"""
    if not data:
        return

    print(f"\n🎯 KẾT QUẢ TRUNG BÌNH 20 ROUND CUỐI - {dataset_name}")
    print("=" * 90)

    results = []
    for key, data_item in data.items():
        metrics = data_item['metrics']
        last_20 = metrics[-20:] if len(metrics) >= 20 else metrics

        avg_train_acc = np.mean([m['train_acc'] for m in last_20]) * 100
        avg_val_acc = np.mean([m.get('val_acc', 0) for m in last_20]) * 100
        avg_train_loss = np.mean([m['train_loss'] for m in last_20])
        avg_val_loss = np.mean([m.get('val_loss', 0) for m in last_20])
        total_time = sum([m.get('round_total_time', 0) for m in metrics]) / 3600

        results.append({
            'Algorithm': data_item['strategy_name'],
            'Alpha': data_item['alpha'],
            'Train Acc (%)': avg_train_acc,
            'Val Acc (%)': avg_val_acc,
            'Train Loss': avg_train_loss,
            'Val Loss': avg_val_loss,
            'Total Time (h)': total_time
        })

    # In bảng kết quả
    for res in results:
        print(f"{res['Algorithm']:10s} | α={res['Alpha']} | "
              f"Val Acc: {res['Val Acc (%)']:.2f}% | "
              f"Train Acc: {res['Train Acc (%)']:.2f}% | "
              f"Val Loss: {res['Val Loss']:.4f} | "
              f"Train Loss: {res['Train Loss']:.4f} | "
              f"Time: {res['Total Time (h)']:.2f}h")


    algorithms = [ 'FedProx', 'FedAdam', 'FedAvg']
    x = np.arange(len(algorithms))

    val_acc_03 = []
    val_acc_09 = []

    for algo in algorithms:
        val_acc_03.append(next(r['Val Acc (%)'] for r in results if r['Algorithm'] == algo and r['Alpha'] == 0.3))
        val_acc_09.append(next(r['Val Acc (%)'] for r in results if r['Algorithm'] == algo and r['Alpha'] == 0.9))

    # 🎨 Biểu đồ cột so sánh Val Accuracy
    fig, ax1 = plt.subplots(figsize=(10, 6))
    algorithms = list(set([r['Algorithm'] for r in results]))
    x = np.arange(len(algorithms))
    width = 0.35

    alpha_03_scores = val_acc_03
    alpha_09_scores = val_acc_09

    bars1 = ax1.bar(x - width/2, alpha_03_scores, width, label='α=0.3', color='#3498db', alpha=0.8)
    bars2 = ax1.bar(x + width/2, alpha_09_scores, width, label='α=0.9', color='#e74c3c', alpha=0.8)

    ax1.set_title(f'So sánh Val Accuracy - {dataset_name}')
    ax1.set_xlabel('Thuật toán')
    ax1.set_ylabel('Validation Accuracy (%)')
    ax1.set_xticks(x)
    ax1.set_xticklabels(algorithms)
    ax1.legend()
    ax1.grid(True, alpha=0.3, axis='y')

    # Ghi số lên cột
    for bars in [bars1, bars2]:
        for bar in bars:
            height = bar.get_height()
            ax1.text(bar.get_x() + bar.get_width()/2., height + 0.5,
                     f'{height:.2f}%', ha='center', va='bottom', fontsize=9)

    plt.tight_layout()
    plt.show()

    # 🎨 Biểu đồ line: So sánh Val Accuracy theo thuật toán (từng alpha)
    fig, ax = plt.subplots(figsize=(10, 6))

    ax.plot(x, val_acc_03, '-o', color='#3498db', label='α=0.3')
    ax.plot(x, val_acc_09, '-o', color='#e74c3c', label='α=0.9')

    # Annotate
    for i in range(len(x)):
        ax.text(x[i], val_acc_03[i] + 0.5, f"{val_acc_03[i]:.2f}", fontsize=9)
        ax.text(x[i], val_acc_09[i] + 0.5, f"{val_acc_09[i]:.2f}", fontsize=9)

    ax.set_xticks(x)
    ax.set_xticklabels(algorithms)
    ax.set_xlabel('Thuật toán')
    ax.set_ylabel('Validation Accuracy (%)')
    ax.set_title(f'{dataset_name}')
    ax.grid(True, linestyle='--', alpha=0.5)

    # Adjust y-axis limits to accommodate annotations
    y_min = min(min(val_acc_03), min(val_acc_09)) - 1.5
    y_max = max(max(val_acc_03), max(val_acc_09)) + 1.5
    ax.set_ylim(y_min, y_max)

    ax.legend()

    plt.tight_layout()
    plt.show()


In [None]:
create_final_stats_table(data, dataset_name)

In [None]:
def plot_additional_analysis(data, dataset_name=dataset_name):
    """5. Các biểu đồ phân tích bổ sung"""
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle(f'Phân tích bổ sung - {dataset_name}', fontsize=16, fontweight='bold')

    # 1. Convergence Rate (tốc độ hội tụ)
    ax1 = axes[0, 0]
    for alpha in [0.3, 0.9]:
        for algorithm in ['FedAvg', 'FedProx', 'FedAdam']:
            key = f"{algorithm}_{alpha}"
            if key in data:
                metrics = data[key]['metrics']
                val_acc = [m.get('val_acc', 0) * 100 for m in metrics]
                # Tính độ cải thiện so với round trước
                improvements = [val_acc[i] - val_acc[i-1] for i in range(1, len(val_acc))]
                # Fix: Use safer data access with enumerate
                rounds = [m.get('round', i+1) for i, m in enumerate(metrics[1:])]

                linestyle = '-' if alpha == 0.3 else '--'
                ax1.plot(rounds, improvements, linestyle,
                       label=f'{algorithm} α={alpha}',
                       color=COLORS[algorithm], alpha=0.7)

    ax1.set_title('Tốc độ hội tụ (Validation Accuracy)')
    ax1.set_xlabel('Round')
    ax1.set_ylabel('Cải thiện Accuracy (%)')
    ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    ax1.grid(True, alpha=0.3)
    ax1.axhline(y=0, color='black', linestyle='-', alpha=0.5)

    # 2. Performance comparison (Bar chart)
    ax2 = axes[0, 1]
    algorithms = ['FedAvg', 'FedProx', 'FedAdam']
    alpha_03_scores = []
    alpha_09_scores = []

    for algorithm in algorithms:
        score_03 = 0
        score_09 = 0

        key_03 = f"{algorithm}_0.3"
        key_09 = f"{algorithm}_0.9"

        if key_03 in data:
            last_20 = data[key_03]['metrics'][-20:]
            score_03 = np.mean([m.get('val_acc', 0) for m in last_20]) * 100

        if key_09 in data:
            last_20 = data[key_09]['metrics'][-20:]
            score_09 = np.mean([m.get('val_acc', 0) for m in last_20]) * 100

        alpha_03_scores.append(score_03)
        alpha_09_scores.append(score_09)

    x = np.arange(len(algorithms))
    width = 0.35

    bars1 = ax2.bar(x - width/2, alpha_03_scores, width, label='α=0.3', color='#3498db', alpha=0.8)
    bars2 = ax2.bar(x + width/2, alpha_09_scores, width, label='α=0.9', color='#e74c3c', alpha=0.8)

    ax2.set_title('So sánh hiệu suất cuối cùng')
    ax2.set_xlabel('Thuật toán')
    ax2.set_ylabel('Validation Accuracy (%)')
    ax2.set_xticks(x)
    ax2.set_xticklabels(algorithms)
    ax2.legend()
    ax2.grid(True, alpha=0.3, axis='y')

    # Thêm giá trị lên các cột
    for bar in bars1:
        height = bar.get_height()
        ax2.text(bar.get_x() + bar.get_width()/2., height + 0.5,
                f'{height:.1f}%', ha='center', va='bottom', fontsize=9)
    for bar in bars2:
        height = bar.get_height()
        ax2.text(bar.get_x() + bar.get_width()/2., height + 0.5,
                f'{height:.1f}%', ha='center', va='bottom', fontsize=9)

    # 3. Loss evolution comparison
    ax3 = axes[1, 0]
    for alpha in [0.3, 0.9]:
        for algorithm in ['FedAvg', 'FedProx', 'FedAdam']:
            key = f"{algorithm}_{alpha}"
            if key in data:
                metrics = data[key]['metrics']
                # Fix: Use safer data access with enumerate
                rounds = [m.get('round', i+1) for i, m in enumerate(metrics)]
                train_loss = [m['train_acc'] for m in metrics]

                linestyle = '-' if alpha == 0.3 else '--'
                ax3.plot(rounds, train_loss, linestyle,
                       label=f'{algorithm} α={alpha}',
                       color=COLORS[algorithm], alpha=0.7)

    ax3.set_title('So sánh Training Accuracy')
    ax3.set_xlabel('Round')
    ax3.set_ylabel('Training Accuracy')
    ax3.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    ax3.grid(True, alpha=0.3)
    ax3.set_yscale('log')  # Log scale for better visualization

    # Tạo legend handles riêng
    ax4 = axes[1, 1]


    for algorithm in ['FedAvg', 'FedProx','FedAdam']:
        for alpha in [0.3, 0.9]:
            key = f"{algorithm}_{alpha}"
            if key in data:
                metrics = data[key]['metrics']
                last_20 = metrics[-20:] if len(metrics) >= 20 else metrics
                avg_val_acc = np.mean([m.get('val_acc', 0) for m in last_20]) * 100
                total_time_seconds = sum([m.get('round_total_time', 0) for m in metrics])
                total_time_hours = total_time_seconds / 3600

                marker = 'o' if alpha == 0.3 else 's'
                ax4.scatter(total_time_hours, avg_val_acc,
                          color=COLORS[algorithm],
                          s=100, marker=marker, alpha=0.8)

                # Thêm annotation với bbox (khung riêng)
                ax4.annotate(f'{algorithm} α={alpha}\n{avg_val_acc:.2f}%, {total_time_hours:.2f}h',
                            (total_time_hours, avg_val_acc),
                            xytext=(10, 10), textcoords='offset points',
                            fontsize=8,
                            bbox=dict(boxstyle="round,pad=0.3",
                                    facecolor=COLORS[algorithm],
                                    alpha=0.3,
                                    edgecolor=COLORS[algorithm]),
                            ha='left')
    ax4.set_title('Thời gian và Accuracy')
    ax4.set_xlabel('Thời gian (giờ)')
    ax4.set_ylabel('Validation Accuracy (%)')
    ax4.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.savefig(f'additional_analysis_{dataset_name.lower().replace("-", "_")}.png',
               dpi=300, bbox_inches='tight')
    plt.show()

In [None]:
plot_additional_analysis(data, dataset_name)

In [None]:
def plot_additional_analysis(data, dataset_name=dataset_name):
    """5. Các biểu đồ phân tích bổ sung"""
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    fig.suptitle(f'Tốc độ hội tụ (Validation Accuracy) - {dataset_name}', fontsize=16, fontweight='bold')

    # 1. Convergence Rate (tốc độ hội tụ)
    ax1 = axes[0]
    for alpha in [0.3]:
        for algorithm in ['FedAvg', 'FedProx', 'FedAdam']:
            key = f"{algorithm}_{alpha}"
            if key in data:
                metrics = data[key]['metrics']
                val_acc = [m.get('val_acc', 0) * 100 for m in metrics]
                # Tính độ cải thiện so với round trước (trước - sau)
                improvements = [val_acc[i] - val_acc[i-1] for i in range(1, len(val_acc))]
                # Fix: Use safer data access with enumerate
                rounds = [m.get('round', i+1) for i, m in enumerate(metrics[1:])]

                linestyle = '-' if alpha == 0.3 else '--'
                ax1.plot(rounds, improvements, linestyle,
                       label=f'{algorithm} α={alpha}',
                       color=COLORS[algorithm], alpha=0.7)

    ax1.set_title('α = 0.3',fontsize=13, fontweight='bold')
    ax1.set_xlabel('Round')
    ax1.set_ylabel('Cải thiện Accuracy (%)')
    ax1.legend(loc='upper right')
    ax1.grid(True, alpha=0.3)
    ax1.axhline(y=0, color='black', linestyle='-', alpha=0.5)


    ax2 = axes[1]
    for alpha in [0.9]:
        for algorithm in ['FedAvg', 'FedProx', 'FedAdam']:
            key = f"{algorithm}_{alpha}"
            if key in data:
                metrics = data[key]['metrics']
                val_acc = [m.get('val_acc', 0) * 100 for m in metrics]
                # Tính độ cải thiện so với round trước
                improvements = [val_acc[i] - val_acc[i-1] for i in range(1, len(val_acc))]
                #
                rounds = [m.get('round', i+1) for i, m in enumerate(metrics[1:])]
                linestyle = '-' if alpha == 0.3 else '--'
                ax2.plot(rounds, improvements, linestyle,
                       label=f'{algorithm} α={alpha}',
                       color=COLORS[algorithm], alpha=0.7)

    ax2.set_title('α = 0.9',fontsize=13, fontweight='bold')
    ax2.set_xlabel('Round')
    ax2.legend(loc='upper right')
    ax2.grid(True, alpha=0.3, )
    ax2.axhline(y=0, color='black', linestyle='-', alpha=0.5)


    plt.tight_layout()
    plt.savefig(f'additional_analysis_{dataset_name.lower().replace("-", "_")}.png',
               dpi=300, bbox_inches='tight')
    plt.show()

plot_additional_analysis(data, dataset_name)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def plot_additional_analysis(data, dataset_name="Dataset"):
    """Phân tích trực quan các chỉ số train/val accuracy & loss theo round"""
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle(f'Phân tích chi tiết Accuracy và Loss - {dataset_name}', fontsize=16, fontweight='bold')

    # Các metric và subplot tương ứng
    metrics_keys = [
        ("train_acc", "Training Accuracy", axes[0, 0]),
        ("val_acc", "Validation Accuracy", axes[1, 0]),
        ("train_loss", "Training Loss", axes[0, 1]),
        ("val_loss", "Validation Loss", axes[1, 1]),
    ]

    algorithms = ['FedAvg', 'FedProx', 'FedAdam']
    alphas = [0.3, 0.9]

    COLORS = {
        'FedAdam': '#e74c3c',
        'FedAvg': '#3498db',
        'FedProx': '#27ae60'
    }

    for metric_key, title, ax in metrics_keys:
        for alpha in alphas:
            for algorithm in algorithms:
                key = f"{algorithm}_{alpha}"
                if key in data:
                    metrics = data[key]["metrics"]
                    if not metrics:
                        continue

                    rounds = [m.get("round", i+1) for i, m in enumerate(metrics)]

                    if "acc" in metric_key:
                        # Accuracy: chuyển về phần trăm
                        values = [m.get(metric_key, 0) * 100 for m in metrics]
                    else:
                        # Loss: giữ nguyên
                        values = [m.get(metric_key, 0) for m in metrics]

                    linestyle = '-' if alpha == 0.3 else '--'
                    ax.plot(rounds, values, linestyle,
                           label=f"{algorithm} α={alpha}",
                           color=COLORS[algorithm], alpha=0.8)

        ax.set_title(title, fontsize=14)
        ax.set_xlabel("Round", fontsize=12)
        ax.set_ylabel("Giá trị (%)" if "acc" in metric_key else "Loss", fontsize=12)
        ax.grid(True, alpha=0.3)
        if ax == axes[0, 0] or ax == axes[1, 0]:
            ax.legend(loc='center right')

    plt.tight_layout()
    plt.savefig(f'metrics_analysis_{dataset_name.lower().replace("-", "_")}.png',
               dpi=300, bbox_inches='tight')
    plt.show()

plot_additional_analysis(data, dataset_name)

In [None]:
def generate_conclusions(data, dataset_name=dataset_name):
    """Tạo kết luận từ dữ liệu"""
    if not data:
        return

    print(f"\n🔍 KẾT LUẬN VÀ KHUYẾN NGHỊ - {dataset_name}")
    print("="*60)

    # Tìm thuật toán tốt nhất theo từng metric
    best_acc = {'algorithm': '', 'alpha': 0, 'score': 0}
    best_time = {'algorithm': '', 'alpha': 0, 'time': float('inf')}
    best_convergence = {'algorithm': '', 'alpha': 0, 'final_acc': 0}

    for key, data_item in data.items():
        metrics = data_item['metrics']
        last_20 = metrics[-20:] if len(metrics) >= 20 else metrics
        avg_val_acc = np.mean([m.get('val_acc', 0) for m in last_20]) * 100
        total_time = sum([m.get('round_total_time', 0) for m in metrics]) / 3600
        final_acc = metrics[-1].get('val_acc', 0) * 100

        if avg_val_acc > best_acc['score']:
            best_acc = {'algorithm': data_item['algorithm'], 'alpha': data_item['alpha'], 'score': avg_val_acc}

        if total_time < best_time['time']:
            best_time = {'algorithm': data_item['algorithm'], 'alpha': data_item['alpha'], 'time': total_time}

        if final_acc > best_convergence['final_acc']:
            best_convergence = {'algorithm': data_item['algorithm'], 'alpha': data_item['alpha'], 'final_acc': final_acc}

    print(f"🏆 Thuật toán tốt nhất về Accuracy: {best_acc['algorithm']} (α={best_acc['alpha']}) - {best_acc['score']:.2f}%")
    print(f"⚡ Thuật toán nhanh nhất: {best_time['algorithm']} (α={best_time['alpha']}) - {best_time['time']:.2f}h")
    print(f"🎯 Hội tụ tốt nhất: {best_convergence['algorithm']} (α={best_convergence['alpha']}) - {best_convergence['final_acc']:.2f}%")

    # Phân tích ảnh hưởng của alpha
    print(f"\n📈 PHÂN TÍCH ẢNH HƯỞNG CỦA ALPHA:")
    for algorithm in ['FedAvg', 'FedProx', 'FedAdam']:
        key_03 = f"{algorithm}_0.3"
        key_09 = f"{algorithm}_0.9"

        if key_03 in data and key_09 in data:
            acc_03 = np.mean([m.get('val_acc', 0) for m in data[key_03]['metrics'][-20:]]) * 100
            acc_09 = np.mean([m.get('val_acc', 0) for m in data[key_09]['metrics'][-20:]]) * 100
            diff = acc_03 - acc_09

            trend = f"giảm {abs(diff):.2f}%" if diff < 0 else f"tăng {diff:.2f}%" if diff > 0 else "không đổi"
            print(f"  {algorithm}: Độ chính xác khi α=0.3 so với α=0.9: {trend}")



In [None]:
# 4. Kết luận và khuyến nghị
generate_conclusions(data, dataset_name)

print(f"\n🎉 HOÀN THÀNH PHÂN TÍCH CHO {dataset_name}!")
