In [None]:
import pandas as pd

from google.colab import drive

drive.mount('/content/drive/')

In [None]:
import pandas as pd


df = pd.read_csv('/content/new_uniform_vs_skewed.csv')
df = df.iloc[::10]  # Selects every 10th row, starting from the first row

# Resetting the index if needed
df = df.reset_index(drop=True)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import re

# Step 1: Filter for columns that end with '- Test Accuracy' and include the 'Step' column
relevant_columns = ['Round'] + [col for col in df.columns if col.endswith(' - Test Accuracy')]

# Step 2: Select the first 200 steps (steps 0 to 199)
filtered_df = df[relevant_columns].iloc[:200]

# Step 3: Plotting settings
# Create a colormap (using a sequential colormap for same nc but darker tone for higher j)
colors = {'nc=1_': 'red', 'nc=10_': 'black'}  # Red for nc=1 and Black for nc=10
j_values = {'j=4': 0.9, 'j=16': 1.0}  # Slightly increased opacity for better visibility

# Initialize plot
plt.figure(figsize=(15, 8))

# Iterate over the filtered columns (excluding 'Round')
for col in filtered_df.columns[1:]:
    gamma_value = re.search(r'gamma=([0-9.]+)', col)
    gamma_value = float(gamma_value.group(1)) if gamma_value else None

    # Extract j and nc values from the column name
    j_value = 'j=4' if 'j=4' in col else 'j=16'
    nc_value = 'nc=1_' if 'nc=1_' in col else 'nc=10_'

    # Set the color based on column values
    if 'uniform' in col:
        color = mcolors.to_rgba('black', alpha=0.6 if j_value == 'j=4' else 0.8)
    else:
        if nc_value == 'nc=1_':
            color = mcolors.to_rgba('red', alpha=0.8 - (gamma_value / 8 if gamma_value else 0))
        else:
            color = mcolors.to_rgba('blue', alpha=0.8 if j_value == 'j=4' else 1.0)

    # Construct a label for the legend
    label = f'gamma={gamma_value}, {j_value}, {nc_value}'

    # Plot the data as continuous lines without markers
    plt.plot(filtered_df['Round'], filtered_df[col], label=label, color=color, linewidth=1)

# Step 4: Final plot adjustments
plt.title('Test Accuracy Over Steps', fontsize=14)
plt.xlabel('Steps', fontsize=12)
plt.ylabel('Test Accuracy', fontsize=12)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize='small')  # Adjusted legend size for better fit
plt.grid(True)

plt.savefig('test_accuracy_plot.pdf', format='pdf', bbox_inches='tight')  # Save as PDF

# Show the plot with tight layout
plt.tight_layout()
plt.show()


In [None]:
df = pd.read_pickle('/content/drive/MyDrive/MLDL/cifar/metrics/clients_classes_dist_fedavg_0_1_50_16.pkl')

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.colors as mcolors

# Extracting client training data
train_data = df['train']

# Number of clients
num_clients = len(train_data)

# Prepare a color map for classes
num_classes = 100  # We have 100 possible classes
cmap = plt.get_cmap('viridis', num_classes)

# Create a figure for the plot
fig, ax = plt.subplots(figsize=(10, 8))

# Sort the classes for each client and plot the bar
for idx, client_classes in enumerate(train_data[:10]):
    sorted_classes = dict(sorted(client_classes.items()))  # Sort by class index
    class_indices = list(sorted_classes.keys())
    class_counts = list(sorted_classes.values())

    # Create cumulative starting points for each class in the bar
    cumulative_counts = np.cumsum([0] + class_counts[:-1])

    # Plot each section of the bar with a corresponding color
    for i, class_idx in enumerate(class_indices):
        ax.barh(idx, class_counts[i], left=cumulative_counts[i], color=cmap(class_idx), edgecolor='black')

# Adding color bar to represent the classes
norm = mcolors.Normalize(vmin=0, vmax=num_classes-1)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])

# Add the color bar
cbar = plt.colorbar(sm, ax=ax)
cbar.set_label('Class Index')

# Label the axes
ax.set_xlabel('Number of Samples')
ax.set_ylabel('Clients')
ax.set_title('Class Distribution for Each Client (Sorted by Class Index)')

# Show plot
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.colors as mcolors
from google.colab import drive

drive.mount('/content/drive/')

# Assuming df1, df2, df3, df4 are defined and have the same structure
df1 = pd.read_pickle('/content/drive/MyDrive/MLDL/cifar/metrics/clients_classes_dist_fedavg_0_1_1_16.pkl')
df2 = pd.read_pickle('/content/drive/MyDrive/MLDL/cifar/metrics/clients_classes_dist_fedavg_0_1_5_16.pkl')
df3 = pd.read_pickle('/content/drive/MyDrive/MLDL/cifar/metrics/clients_classes_dist_fedavg_0_1_10_16.pkl')
df4 = pd.read_pickle('/content/drive/MyDrive/MLDL/cifar/metrics/clients_classes_dist_fedavg_0_1_50_16.pkl')
dataframes = [df1, df2, df3, df4]

# Create a figure with 4 subplots (2x2 layout)
fig, axs = plt.subplots(2, 2, figsize=(15, 12))

# Prepare a color map for classes
num_classes = 100  # We have 100 possible classes
cmap = plt.get_cmap('viridis', num_classes)

for ax, df, nc in zip(axs.flatten(), dataframes, [1,5,10,50]):
    # Extracting client training data for each dataframe
    train_data = df['train']

    # Sort the classes for each client and plot the bar
    for idx, client_classes in enumerate(train_data[:10]):
        sorted_classes = dict(sorted(client_classes.items()))  # Sort by class index
        class_indices = list(sorted_classes.keys())
        class_counts = list(sorted_classes.values())

        # Create cumulative starting points for each class in the bar
        cumulative_counts = np.cumsum([0] + class_counts[:-1])

        # Plot each section of the bar with a corresponding color
        for i, class_idx in enumerate(class_indices):
            ax.barh(idx, class_counts[i], left=cumulative_counts[i], color=cmap(class_idx), edgecolor='black')

    # Adding color bar to represent the classes
    norm = mcolors.Normalize(vmin=0, vmax=num_classes-1)
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
    sm.set_array([])

    # Add the color bar on the side of the figure
    cbar = plt.colorbar(sm, ax=ax)
    cbar.set_label('Class Index')

    # Label the axes
    ax.set_xlabel('Number of Samples')
    ax.set_ylabel('Clients')
    ax.set_title(f'Class Distribution for 10 Clients for NC={nc}')

# Adjust the layout to prevent overlap
plt.tight_layout()


plt.savefig('client_classes_dist_diff_nc.pdf', format='pdf', bbox_inches='tight')
# Show the figure with 4 subplots
plt.show()


In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Columns for test accuracy
columns_to_plot = [
    "shakespeare_iid_skewed_alpha=0.7 - test_accuracy",
    "shakespeare_iid_skewed_alpha=0.5 - test_accuracy",
    "shakespeare_iid_skewed_alpha=0.1 - test_accuracy",
    "shakespeare_iid_uniform - test_accuracy"
]
data = pd.read_csv('/content/shakes_iid.csv')
# Extract step and accuracy values
steps = data['Step']
accuracies = data[columns_to_plot]

# Plot the accuracies for each run
plt.figure(figsize=(10, 6))
for col in columns_to_plot:
    plt.plot(steps, accuracies[col], label=col.split(' - ')[0])  # Label with uniform/skewed + alpha

# Customize the plot
plt.title('Test Accuracy for Different Alpha Values and Distribution')
plt.xlabel('Step')
plt.ylabel('Test Accuracy (%)')
plt.legend(title="Run Name")
plt.grid(True)
plt.tight_layout()

# Show the plot
plt.show()


In [None]:
data = pd.read_csv('/content/shakes_iid_loss.csv')

In [None]:
import pandas as pd

data = pd.read_csv('/content/acc_iid_cifar.csv')

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Columns for test accuracy
columns_to_plot = [
    "shakespeare_iid_skewed_alpha=0.7 - test_loss",
    "shakespeare_iid_skewed_alpha=0.5 - test_loss",
    "shakespeare_iid_skewed_alpha=0.1 - test_loss",
    "shakespeare_iid_uniform - test_loss"
]
# Extract step and accuracy values
steps = data['Step']
accuracies = data[columns_to_plot]

# Plot the accuracies for each run
plt.figure(figsize=(10, 6))
for col in columns_to_plot:
    plt.plot(steps, accuracies[col], label=col.split(' - ')[0])  # Label with uniform/skewed + alpha

# Customize the plot
plt.title('Test Loss for Different Alpha Values and Distribution')
plt.xlabel('Step')
plt.ylabel('Test Loss (%)')
plt.legend(title="Run Name")
plt.grid(True)
plt.tight_layout()

# Show the plot
plt.show()


In [None]:
data = pd.read_csv('/content/shakes_niid_loss.csv')
x = list(data.columns)

In [None]:
columns_to_plot = [el for el in x if el.endswith('test_loss') and 'uniform' not in el]

In [None]:
# Define the values of alpha and j from the column names
alpha_values = [0.7, 0.5, 0.1]
j_values = [16, 8, 4]

# Loop through each alpha value and plot the corresponding curves for different j values
plt.figure(figsize=(12, 8))
for alpha in alpha_values:
    plt.figure(figsize=(8, 6))
    for j in j_values:
        column_name = f"shakespeare_niid_skewed_j={j}_alpha={alpha} - test_loss"
        if column_name in data.columns:
            plt.plot(data['Step'], data[column_name], label=f"j={j}")

    # Customize the plot
    plt.title(f'Test Loss for alpha={alpha}')
    plt.xlabel('Step')
    plt.ylabel('Test Loss (%)')
    plt.legend(title="j value")
    plt.grid(True)
    plt.tight_layout()
    plt.show()


In [None]:
data = pd.read_csv('/content/shakes_niid_uniform_loss.csv')
x = list(data.columns)

In [None]:
columns_to_plot = [el for el in x if el.endswith('test_loss')]

In [None]:
# Extract step and accuracy values
steps = data['Step']
accuracies = data[columns_to_plot]

# Plot the accuracies for each run
plt.figure(figsize=(10, 6))
for col in columns_to_plot:
    plt.plot(steps, accuracies[col], label=col.split(' - ')[0])  # Label with uniform/skewed + alpha

# Customize the plot
plt.title('Test Loss for Different Local Epochs')
plt.xlabel('Step')
plt.ylabel('Test Loss (%)')
plt.legend(title="Run Name")
plt.grid(True)
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
df = pd.read_csv('cifar_noniid.csv')

In [None]:
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import re
import pandas as pd

df = pd.read_csv('/content/cifar_noniid.csv')



# Step 1: Filter for columns that end with '- Test Accuracy' and include the 'Step' column
relevant_columns = ['Round'] + [col for col in df.columns if col.endswith(' - Test Accuracy')]

# Step 2: Select the first 200 steps (steps 0 to 199)
filtered_df = df[relevant_columns].iloc[:200]

# Step 3: Plotting settings
# Create a colormap (using a sequential colormap for same nc but darker tone for higher j)
colors = {'nc=1_': 'red', 'nc=10_': 'black'}  # Red for nc=1 and Black for nc=10
j_values = {'j=4': 0.5, 'j=16': 1.0, 'j=8': 0.7}  # Slightly increased opacity for better visibility

temp_cols = ['federated_cifar_100_noniid_uniform_j=4_nc=1 - Test Accuracy',
             'federated_cifar_100_noniid_uniform_j=8_nc=1 - Test Accuracy',
             'federated_cifar_100_noniid_uniform_j=16_nc=1 - Test Accuracy',
       'federated_cifar_100_noniid_uniform_j=4_nc=10 - Test Accuracy',
       'federated_cifar_100_noniid_uniform_j=8_nc=10 - Test Accuracy',
       'federated_cifar_100_noniid_uniform_j=16_nc=10 - Test Accuracy']

# Initialize plot
plt.figure(figsize=(15, 8))

# Iterate over the filtered columns (excluding 'Round')
for col in temp_cols:
    gamma_value = re.search(r'gamma=([0-9.]+)', col)
    gamma_value = float(gamma_value.group(1)) if gamma_value else None

    # Extract j and nc values from the column name
    j_value = 'j=4' if 'j=4' in col else 'j=16' if 'j=16' in col else 'j=8'
    nc_value = 'nc=1_' if 'nc=1_' in col or 'nc=1 'in col else 'nc=10_'

    if nc_value == 'nc=1_':
        color = mcolors.to_rgba('green', alpha=j_values[j_value])
    elif nc_value == 'nc=10_':
        color = mcolors.to_rgba('blue', alpha=j_values[j_value])
    # else:
    #     color = mcolors.to_rgba('blue', alpha=0.5 if j_value == 'j=4' else 1.0)

    # Construct a label for the legend
    label = f'{j_value}, {nc_value.replace("_","")}'

    # Plot the data as continuous lines without markers
    plt.plot(filtered_df['Round'], filtered_df[col], label=label, color=color, linewidth=1)

# Step 4: Final plot adjustments
plt.title('Test Accuracy Over Steps', fontsize=14)
plt.xlabel('Steps', fontsize=12)
plt.ylabel('Test Accuracy', fontsize=12)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize='small')  # Adjusted legend size for better fit
plt.grid(True)

plt.savefig('test_accuracy_plot.pdf', format='pdf', bbox_inches='tight')  # Save as PDF

# Show the plot with tight layout
plt.tight_layout()
plt.show()


In [None]:
df = pd.read_csv('/content/cifar_noniid_uniform_acc.csv')

In [None]:
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import re
import pandas as pd

df = pd.read_csv('/content/loss_iid_cifar.csv')

df.rename(columns={'Step': 'Round'}, inplace=True)

# Step 1: Filter for columns that end with '- Test Accuracy' and include the 'Step' column
relevant_columns = ['Round'] + [col for col in df.columns if col.endswith(' - Test Loss')]

# Step 2: Select the first 200 steps (steps 0 to 199)
filtered_df = df[relevant_columns].iloc[:200]

# Step 3: Plotting settings
# Create a colormap (using a sequential colormap for same nc but darker tone for higher j)
colors = {'nc=1_': 'red', 'nc=10_': 'black'}  # Red for nc=1 and Black for nc=10
j_values = {'j=4': 0.9, 'j=16': 1.0}  # Slightly increased opacity for better visibility

# Initialize plot
plt.figure(figsize=(15, 8))

temp_cols = ['federated_cifar_100_iid_2000_uniform - Test Loss',
             'federated_cifar_100_iid_2000_skewed_gamma_0.1 - Test Loss',
'federated_cifar_100_iid_2000_skewed_gamma_0.5 - Test Loss',
'federated_cifar_100_iid_2000_skewed_gamma_0.7 - Test Loss']
# Iterate over the filtered columns (excluding 'Round')
for col in temp_cols:
    gamma_value = re.search(r'gamma_([0-9.]+)', col)
    gamma_value = float(gamma_value.group(1)) if gamma_value else 0

    # Extract j and nc values from the column name
    j_value = 'j=4' if 'j=4' in col else 'j=16'
    nc_value = 'nc=1_' if 'nc=1_' in col or 'nc=1 'in col else 'nc=10_'
    if 'uniform' in col:
        color = mcolors.to_rgba('black', alpha=1)
    else:
        color = mcolors.to_rgba('red', alpha=gamma_value/2+0.3)
    # Construct a label for the legend
    label = f'gamma = {gamma_value}' if 'uniform' not in col else 'Uniform'

    # Plot the data as continuous lines without markers
    plt.plot(filtered_df['Round'], filtered_df[col], label=label, color=color, linewidth=1)

# Step 4: Final plot adjustments
plt.title('Test Loss Over Steps', fontsize=14)
plt.xlabel('Steps', fontsize=12)
plt.ylabel('Test Loss', fontsize=12)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize='small')  # Adjusted legend size for better fit
plt.grid(True)

plt.savefig('test_loss_plot.pdf', format='pdf', bbox_inches='tight')  # Save as PDF

# Show the plot with tight layout
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
from functools import reduce


df1 = pd.read_pickle('/content/metrics_pfedhn_0.9_gen_0_1_1_4.pkl')
df1 = df1.rename(columns={'New Clients Test Accuracy': 'new_nc1_0.9', 'Test Accuracy': 'acc_nc1_0.9'})

df1['new_nc1_0.9'] *=100
df1['acc_nc1_0.9'] *=100
nc1_acc = df1['Old Test Accuracy'][0]*100

df2 = pd.read_pickle('/content/metrics_pfedhn_1_gen_0_1_1_4.pkl')
df2 = df2.rename(columns={'New Clients Test Accuracy': 'new_nc1_1', 'Test Accuracy': 'acc_nc1_1'})
df2['new_nc1_1'] *=100
df2['acc_nc1_1'] *=100



df3 = pd.read_pickle('/content/metrics_pfedhn_0.9_gen_0_1_5_4.pkl')
df3 = df3.rename(columns={'New Clients Test Accuracy': 'new_nc5_0.9', 'Test Accuracy': 'acc_nc5_0.9'})
df3['new_nc5_0.9'] *=100
df3['acc_nc5_0.9'] *=100
nc5_acc = df3['Old Test Accuracy'][0]*100


df4 = pd.read_pickle('/content/metrics_pfedhn_1_gen_0_1_5_4.pkl')
df4 = df4.rename(columns={'New Clients Test Accuracy': 'new_nc5_1', 'Test Accuracy': 'acc_nc5_1'})
df4 = df4[['Round', 'new_nc5_1', 'acc_nc5_1']]
df4['new_nc5_1'] *=100
df4['acc_nc5_1'] *=100

df5 = pd.read_pickle('/content/metrics_pfedhn_1_gen_0_1_10_4.pkl')
df5 = df5.rename(columns={'New Clients Test Accuracy': 'new_nc10_1', 'Test Accuracy': 'acc_nc10_1'})
df5['new_nc10_1'] *=100
df5['acc_nc10_1'] *=100
nc10_acc = df5['Old Test Accuracy'][0]*100
df5 = df5[['Round', 'new_nc10_1', 'acc_nc10_1']]


df6 = pd.read_pickle('/content/metrics_pfedhn_0.9_gen_0_1_10_4.pkl')
df6 = df6.rename(columns={'New Clients Test Accuracy': 'new_nc10_0.9', 'Test Accuracy': 'acc_nc10_0.9'})
df6['new_nc10_0.9'] *=100
df6['acc_nc10_0.9'] *=100
df6 = df6[['Round', 'new_nc10_0.9', 'acc_nc10_0.9']]

dfs = [df1, df2, df3, df4, df5, df6]
merged_df = reduce(lambda left, right: pd.merge(left, right, on='Round', how='left'), dfs)
# New Clients Test Accuracy, Test Accuracy
df_to_plot = merged_df[['Round', 'new_nc1_0.9', 'acc_nc1_0.9', 'new_nc1_1', 'acc_nc1_1', 'new_nc5_0.9', 'acc_nc5_0.9', 'new_nc5_1', 'acc_nc5_1', 'new_nc10_1', 'acc_nc10_1', 'new_nc10_0.9', 'acc_nc10_0.9']]
df_to_plot



In [None]:
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt

relevant_columns = ['new_nc1_0.9', 'acc_nc1_0.9', 'new_nc1_1', 'acc_nc1_1','new_nc5_0.9', 'acc_nc5_0.9', 'new_nc5_1', 'acc_nc5_1', 'new_nc10_1', 'acc_nc10_1', 'new_nc10_0.9', 'acc_nc10_0.9']

# Create a figure with 1 row and 2 columns of subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))

# Iterate over relevant columns
for col in relevant_columns:
    # Determine color based on bias in the column name
    bias = 0.9 if '0.9' in col else 1
    alpha = 0.6 if bias == 0.9 else 1
    if 'nc1_' in col:
        color = mcolors.to_rgba('blue', alpha=alpha)
    elif 'nc10' in col:
        color = mcolors.to_rgba('green', alpha=alpha)
    else:
        color = mcolors.to_rgba('red', alpha=alpha)

    # Define label based on the content in the column
    label = f"NC={'1' if 'nc1_' in col else '10' if 'nc10' in col else '5'} (Bias = {'0.9' if '0.9' in col else '1'})"

    # Plot in appropriate subplot
    if 'new' in col:
        ax1.plot(df_to_plot['Round'], df_to_plot[col], label=label, color=color, linewidth=1)
    else:
        ax2.plot(df_to_plot['Round'], df_to_plot[col], label=label, color=color, linewidth=1)

# Set y-axis limits for both subplots
ax1.set_ylim(0, 100)
ax2.set_ylim(0, 100)


ax1.axhline(y=nc1_acc, color=mcolors.to_rgba('blue', alpha=0.4), linestyle='dashdot', linewidth=3, label='NC1 - OLD ACC')

ax2.axhline(y=nc1_acc, color=mcolors.to_rgba('blue', alpha=0.4), linestyle='dashdot', linewidth=3, label='NC1 - OLD ACC')

ax1.axhline(y=nc5_acc, color=mcolors.to_rgba('red', alpha=0.4), linestyle='dashdot', linewidth=3, label='NC5 - OLD ACC')

ax2.axhline(y=nc5_acc, color=mcolors.to_rgba('red', alpha=0.4), linestyle='dashdot', linewidth=3, label='NC5 - OLD ACC')


ax1.axhline(y=nc10_acc, color=mcolors.to_rgba('green', alpha=0.4), linestyle='dashdot', linewidth=3, label='NC10 - OLD ACC')

ax2.axhline(y=nc10_acc, color=mcolors.to_rgba('green', alpha=0.4), linestyle='dashdot', linewidth=3, label='NC10 - OLD ACC')



# Adjusting the first subplot (New Clients)
ax1.set_title('New Clients Accuracy Over Steps', fontsize=14)
ax1.set_xlabel('Steps', fontsize=12)
ax1.set_ylabel('New Clients Accuracy', fontsize=12)
ax1.grid(True)
ax1.legend(loc='upper left')

# Adjusting the second subplot (Global Test Accuracy)
ax2.set_title('Global Test Accuracy Over Steps', fontsize=14)
ax2.set_xlabel('Steps', fontsize=12)
ax2.set_ylabel('Global Test Accuracy', fontsize=12)
ax2.grid(True)
ax2.legend(loc='upper left')

# Adjust layout for better spacing
plt.tight_layout()

# Save the plot as a PDF
plt.savefig('gen_test_accuracy_plot.pdf', format='pdf', bbox_inches='tight')

# Show the plot
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import dirichlet


df = pd.read_pickle('/content/drive/MyDrive/cifar/metrics/clients_dist_dirichlet.pkl')
# Plot histograms for each gamma value
fig, axes = plt.subplots(1, 3, figsize=(18, 6), sharey=True)

for i, gamma in enumerate(gamma_values):
    ax = axes[i]
    ax.bar(range(num_clients), df[f'gamma_{gamma}'], color=['blue', 'orange', 'green'][i], width=1.0)
    ax.set_title(f'Dirichlet Distribution with gamma={gamma}')
    ax.set_xlabel('Client')
    ax.set_ylabel('Selection Count')
    ax.set_xlim(0, num_clients)

plt.suptitle('Client Selection Distribution for Different Gamma Values over 2000 Rounds')
plt.tight_layout()

plt.savefig('gamma_distribution.pdf', format='pdf', bbox_inches='tight')

plt.show()


In [96]:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.colors as mcolors
import random
import pandas as pd

df = pd.read_pickle('/content/drive/MyDrive/MLDL/shakespeare/metrics/clients_dist.pkl')

# Assuming train_data is a list
# Extracting client training data


train_data = df['train']
random_selection = df['train'].sample(n=10)

# Number of clients
num_clients = len(train_data)

# Prepare a color map for classes
num_classes = 80  # We have 100 possible classes
cmap = plt.get_cmap('viridis', num_classes)

# Create a figure for the plot
fig, ax = plt.subplots(figsize=(10, 8))

# Sort the classes for each client and plot the bar
for idx, client_classes in enumerate(random_selection):
    sorted_classes = dict(sorted(client_classes.items()))  # Sort by class index
    class_indices = list(sorted_classes.keys())
    class_counts = list(sorted_classes.values())

    # Create cumulative starting points for each class in the bar
    cumulative_counts = np.cumsum([0] + class_counts[:-1])

    # Plot each section of the bar with a corresponding color
    for i, class_idx in enumerate(class_indices):
        ax.barh(idx, class_counts[i], left=cumulative_counts[i], color=cmap(class_idx), edgecolor='black')

# Adding color bar to represent the classes
norm = mcolors.Normalize(vmin=0, vmax=num_classes-1)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])

# Add the color bar
cbar = plt.colorbar(sm, ax=ax)
cbar.set_label('Class Index')

# Label the axes
ax.set_xlabel('Number of Samples')
ax.set_ylabel('Clients')
ax.set_title('Class Distribution for Each Client (Sorted by Class Index)')


plt.savefig('class_dist_shakes_niid.pdf', format='pdf', bbox_inches='tight')

# Show plot
plt.show()