# Imports

# Functions

In [5]:
# Adding a modified script to process and plot the data
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Paths to the uploaded JSON files
file_paths = ['5_client_40.json', '5_client_baseline.json']

# Modified function to process each JSON file with an added file identifier
def process_json(file_path, file_identifier):
    with open(file_path, 'r') as file:
        data = json.load(file)

    # Extract data with file identifier
    extracted_data = []
    for record in data:
        client_name = record['sender']['name']
        correlation_id = record['correlation_id']
        training_info = record['data']
        extracted_data.append({
            'file_id': file_identifier,  # Adding file identifier
            'client': client_name,
            'correlationId': correlation_id,
            'training_loss': training_info['training_loss'],
            'training_accuracy': training_info['training_accuracy'],
            'test_loss': training_info['test_loss'],
            'test_accuracy': training_info['test_accuracy']
        })
    return extracted_data

# Processing all JSON files with file identifiers
all_data = []
for i, file_path in enumerate(file_paths):
    file_identifier = f'File_{i+1}'  # Creating a unique identifier for each file
    all_data.extend(process_json(file_path, file_identifier))

# Create DataFrame
df = pd.DataFrame(all_data)

# Assigning round numbers based on unique correlationId
unique_correlation_ids = sorted(df['correlationId'].unique())
round_mapping = {correlation_id: f'Round {i+1}' for i, correlation_id in enumerate(unique_correlation_ids)}

# Applying the mapping to the DataFrame
df['Round'] = df['correlationId'].map(round_mapping)

# Calculate means for the grouped data by round and file
mean_df = df.groupby(['Round', 'file_id']).mean().reset_index()

# Plotting separate lines for each file
plt.figure(figsize=(20, 10))

# Plot training accuracy
plt.subplot(2, 2, 1)
sns.lineplot(x='Round', y='training_accuracy', hue='file_id', data=mean_df, marker='o')
plt.title('Mean Training Accuracy per Round')
plt.xticks(rotation=45)
plt.grid()

# Plot training loss
plt.subplot(2, 2, 2)
sns.lineplot(x='Round', y='training_loss', hue='file_id', data=mean_df, marker='o')
plt.title('Mean Training Loss per Round')
plt.xticks(rotation=45)
plt.grid()

# Plot test accuracy
plt.subplot(2, 2, 3)
sns.lineplot(x='Round', y='test_accuracy', hue='file_id', data=mean_df, marker='o')
plt.title('Mean Test Accuracy per Round')
plt.xticks(rotation=45)
plt.grid()

# Plot test loss
plt.subplot(2, 2, 4)
sns.lineplot(x='Round', y='test_loss', hue='file_id', data=mean_df, marker='o')
plt.title('Mean Test Loss per Round')
plt.xticks(rotation=45)
plt.grid()

plt.tight_layout()
plt.legend()


KeyError: 'correlation_id'