In [33]:
import json
import numpy as np

# Function to read JSON file and parse results
def read_results(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        results = json.load(f)
    return results

# Function to compute mean metrics over the classes for each run
def compute_mean_metrics(results):
    analysis = {}
    for config, data in results.items():
        few_shot_data = data.get("few_shot", {})
        analysis[config] = {}
        for shot, shot_data in few_shot_data.items():
            analysis[config][shot] = {"train": {}, "test": {}}
            for res_type in ["train", "test"]:
                res_data = shot_data.get(res_type, {})
                class_metrics = {k: v for k, v in res_data.items() if k != "overall"}
                
                # Get all unique run numbers
                run_numbers = set(run for metrics in class_metrics.values() for run in metrics.keys())

                for run in run_numbers:
                    # Initialize dictionary to hold the sums and counts for each metric
                    metric_sums = {}
                    metric_counts = {}

                    # Collect metric values for each class
                    for class_name, metrics in class_metrics.items():
                        if run in metrics:
                            run_metrics = metrics[run]
                            for metric, value in run_metrics.items():
                                if not np.isnan(value) and value != -1:
                                    if metric not in metric_sums:
                                        metric_sums[metric] = 0
                                        metric_counts[metric] = 0
                                    metric_sums[metric] += value
                                    metric_counts[metric] += 1

                    # Compute mean for each metric for this run
                    mean_metrics = {metric: (metric_sums[metric] / metric_counts[metric] if metric_counts[metric] > 0 else float('nan')) for metric in metric_sums}

                    # Multiply the metrics by 100
                    mean_metrics = {metric: value * 100 for metric, value in mean_metrics.items()}
                    
                    if run not in analysis[config][shot][res_type]:
                        analysis[config][shot][res_type][run] = {}
                    analysis[config][shot][res_type][run] = {
                        "mean_metrics": mean_metrics
                    }

    return analysis

# Path to the JSON results file
file_path = 'evaluation_results.json'

# Read the results from the JSON file
results = read_results(file_path)

# Compute the analysis
mean_results = compute_mean_metrics(results)

# Print the analysis
import pprint
pprint.pprint(mean_results)


{'fcos_PVTV2B2LI_FPNRETINANET_XQSABGA_DIOR.yaml': {'10_shot': {'test': {'0': {'mean_metrics': {'AP': 2.0818186380394046,
                                                                                               'AP50': 4.940412979466116,
                                                                                               'AP75': 1.5630441351571482,
                                                                                               'APl': 4.145027740529287,
                                                                                               'APm': 2.6528321170451497,
                                                                                               'APs': 0.6717463642751833}},
                                                                        '1': {'mean_metrics': {'AP': 2.323729308134053,
                                                                                               'AP50': 5.663571241633669,
                         

In [36]:
import numpy as np

# New evaluation results to be added
new_evaluation_results = {
    "faster_rcnn_FCT_DOTA.yaml": {
        "10_shot": {
            "train": {
                "0": {
                    "mean_metrics": {
                        "AP": 28.3912,
                        "AP50": 49.9011,
                        "AP75": 28.2457,
                        "APs": 22.9618,
                        "APm": 54.6035,
                        "APl": 55.3671
                    }
                }
            },
            "test": {
                "0": {
                    "mean_metrics": {
                        "AP": 17.5446,
                        "AP50": 32.6972,
                        "AP75": 16.6045,
                        "APs": 16.2349,
                        "APm": 19.7232,
                        "APl": 46.2229
                    }
                }
            }
        }
    },
    "faster_rcnn_FCT_DIOR.yaml": {
        "10_shot": {
            "train": {
                "0": {
                    "mean_metrics": {
                        "AP": 51.8869,
                        "AP50": 69.5572,
                        "AP75": 57.7049,
                        "APs": 10.8314,
                        "APm": 51.6611,
                        "APl": 88.5412
                    }
                }
            },
            "test": {
                "0": {
                    "mean_metrics": {
                        "AP": 22.0111,
                        "AP50": 38.5204,
                        "AP75": 23.5506,
                        "APs": 0.8578,
                        "APm": 21.7535,
                        "APl": 66.0012
                    }
                }
            }
        }
    }
}

# Function to add new evaluation results to existing analysis
def add_new_results_to_analysis(analysis, new_results):
    for config, config_data in new_results.items():
        if config not in analysis:
            analysis[config] = {}
        for shot, shot_data in config_data.items():
            if shot not in analysis[config]:
                analysis[config][shot] = {"train": {}, "test": {}}
            for res_type, runs in shot_data.items():
                if res_type not in analysis[config][shot]:
                    analysis[config][shot][res_type] = {"mean_metrics": {}}
                for run_id, run_data in runs.items():
                    analysis[config][shot][res_type][run_id] = run_data

# Add the new evaluation results to the analysis
add_new_results_to_analysis(mean_results, new_evaluation_results)
pprint.pprint(mean_results)

{'faster_rcnn_FCT_DIOR.yaml': {'10_shot': {'test': {'0': {'mean_metrics': {'AP': 22.0111,
                                                                           'AP50': 38.5204,
                                                                           'AP75': 23.5506,
                                                                           'APl': 66.0012,
                                                                           'APm': 21.7535,
                                                                           'APs': 0.8578}}},
                                           'train': {'0': {'mean_metrics': {'AP': 51.8869,
                                                                            'AP50': 69.5572,
                                                                            'AP75': 57.7049,
                                                                            'APl': 88.5412,
                                                                            'APm':

In [37]:
import numpy as np

# Function to compute additional statistics over the runs
def compute_run_statistics(analysis):
    stats = {}
    for config, data in analysis.items():
        stats[config] = {}
        for shot, shot_data in data.items():
            stats[config][shot] = {"train": {}, "test": {}}
            for res_type in ["train", "test"]:
                metrics = shot_data[res_type]
                
                # Collect all metrics values
                metrics_collection = {metric: [] for metric in metrics["0"]["mean_metrics"]}
                for run_id, run_data in metrics.items():
                    for metric, value in run_data["mean_metrics"].items():
                        metrics_collection[metric].append(value)
                
                # Calculate statistics
                stats[config][shot][res_type] = {
                    metric: {
                        "mean": np.mean(values),
                        "min": np.min(values),
                        "max": np.max(values),
                        "variance": np.var(values)
                    }
                    for metric, values in metrics_collection.items()
                }
    return stats

# Compute the extended analysis
run_statistics = compute_run_statistics(mean_results)

# Print the extended analysis
import pprint
pprint.pprint(run_statistics)


{'faster_rcnn_FCT_DIOR.yaml': {'10_shot': {'test': {'AP': {'max': 22.0111,
                                                           'mean': 22.0111,
                                                           'min': 22.0111,
                                                           'variance': 0.0},
                                                    'AP50': {'max': 38.5204,
                                                             'mean': 38.5204,
                                                             'min': 38.5204,
                                                             'variance': 0.0},
                                                    'AP75': {'max': 23.5506,
                                                             'mean': 23.5506,
                                                             'min': 23.5506,
                                                             'variance': 0.0},
                                                    'APl': {'max': 66.0012,

In [50]:
import pandas as pd

# Function to extract dataset name from config file name and clean the config name
def extract_and_clean_config(config_file):
    parts = config_file.split('_')
    dataset_name = parts[-1].replace('.yaml', '')
    cleaned_config = '_'.join(parts[:-1])
    return cleaned_config, dataset_name

# Function to convert analysis to a pandas DataFrame
def analysis_to_dataframe(analysis):
    records = []
    for config, config_data in analysis.items():
        cleaned_config, dataset_name = extract_and_clean_config(config)
        for shot, shot_data in config_data.items():
            for res_type, res_data in shot_data.items():
                for metric, stats in res_data.items():
                    record = {
                        "Config": cleaned_config,
                        "Dataset": dataset_name,
                        "Shot": shot,
                        "Type": res_type,
                        "Metric": metric,
                        "Mean": stats['mean'],
                        "Variance": stats['variance'],
                        "Min": stats['min'],
                        "Max": stats['max']
                    }
                    records.append(record)
    
    df = pd.DataFrame(records)
    df.set_index(['Dataset', 'Shot', 'Config', 'Type', 'Metric'], inplace=True)
    df = df.unstack(level=-1)  # Move the metric level to columns
    return df

# Convert the analysis to a DataFrame
df = analysis_to_dataframe(run_statistics)

# Print or save the DataFrame
print("Multi-Index DataFrame:")
(df).round(2)


Multi-Index DataFrame:


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Mean,Mean,Mean,Mean,Mean,Mean,Variance,Variance,Variance,Variance,...,Min,Min,Min,Min,Max,Max,Max,Max,Max,Max
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Metric,AP,AP50,AP75,APl,APm,APs,AP,AP50,AP75,APl,...,AP75,APl,APm,APs,AP,AP50,AP75,APl,APm,APs
Dataset,Shot,Config,Type,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2
DIOR,10_shot,faster_rcnn_FCT,test,22.01,38.52,23.55,66.0,21.75,0.86,0.0,0.0,0.0,0.0,...,23.55,66.0,21.75,0.86,22.01,38.52,23.55,66.0,21.75,0.86
DIOR,10_shot,faster_rcnn_FCT,train,51.89,69.56,57.7,88.54,51.66,10.83,0.0,0.0,0.0,0.0,...,57.7,88.54,51.66,10.83,51.89,69.56,57.7,88.54,51.66,10.83
DIOR,10_shot,fcos_PVTV2B2LI_FPNRETINANET_XQSABGA,test,2.2,5.3,1.56,4.29,2.86,0.67,0.01,0.13,0.0,0.02,...,1.56,4.15,2.65,0.67,2.32,5.66,1.56,4.43,3.07,0.67
DIOR,10_shot,fcos_PVTV2B2LI_FPNRETINANET_XQSABGA,train,1.13,1.77,1.25,3.12,0.56,0.18,0.08,0.25,0.12,0.09,...,0.9,2.82,0.25,0.14,1.41,2.27,1.59,3.43,0.88,0.21
DIOR,10_shot,fcos_R50_FPNRETINANET_XQSABGA,test,3.4,7.12,2.96,6.57,3.46,0.31,0.0,0.0,0.0,0.05,...,2.92,6.36,3.3,0.13,3.44,7.18,3.0,6.78,3.62,0.49
DIOR,10_shot,fcos_R50_FPNRETINANET_XQSABGA,train,0.75,1.23,0.75,2.29,0.42,0.12,0.01,0.02,0.04,0.38,...,0.54,1.67,0.4,0.09,0.87,1.36,0.96,2.91,0.45,0.16
DOTA,10_shot,faster_rcnn_FCT,test,17.54,32.7,16.6,46.22,19.72,16.23,0.0,0.0,0.0,0.0,...,16.6,46.22,19.72,16.23,17.54,32.7,16.6,46.22,19.72,16.23
DOTA,10_shot,faster_rcnn_FCT,train,28.39,49.9,28.25,55.37,54.6,22.96,0.0,0.0,0.0,0.0,...,28.25,55.37,54.6,22.96,28.39,49.9,28.25,55.37,54.6,22.96
DOTA,10_shot,fcos_PVTV2B2LI_FPNRETINANET_XQSABGA,test,1.1,2.08,1.04,2.56,2.15,0.7,0.01,0.01,0.03,0.64,...,0.86,1.76,1.58,0.39,1.18,2.17,1.22,3.36,2.72,1.0
DOTA,10_shot,fcos_PVTV2B2LI_FPNRETINANET_XQSABGA,train,0.81,1.91,0.63,1.88,1.21,0.25,0.02,0.12,0.01,0.06,...,0.52,1.63,1.03,0.19,0.97,2.25,0.73,2.13,1.39,0.31


In [56]:
# Function to highlight the maximum values in bold for each group
def highlight_max(df, group_levels, color='darkorange'):
    '''
    Highlight the maximum in a DataFrame group
    '''
    attr = f'font-weight: bold; font-style: italic; color: {color}'
    # Create a DataFrame with the same shape filled with empty strings
    styles = pd.DataFrame('', index=df.index, columns=df.columns)
    
    # Iterate over each group and apply the highlight
    for group_name, group_df in df.groupby(level=group_levels):
        # Find the max values for each metric within the group
        for metric in df.columns.levels[0]:
            for stat in df.columns.levels[1]:
                col = (metric, stat)
                if col in group_df.columns:
                    max_value = group_df[col].max()
                    is_max = group_df[col] == max_value
                    styles.loc[is_max.index, col] = np.where(is_max, attr, '')
    
    return styles

# Function to alternate the background colors for the "train" and "test" groups on index level
def alternate_background(df):
    styles = pd.DataFrame('', index=df.index, columns=df.columns)
    
    # Alternate background color for "train" and "test" groups
    colors = ['#333333', '#555555']
    
    group_counter = 0
    previous_group_type = None
    for (config, dataset, shot, group_type), group_df in df.groupby(level=['Config', 'Dataset', 'Shot', 'Type']):
        if group_type != previous_group_type:
            group_counter += 1
        bg_color = colors[group_counter % 2]
        for idx in group_df.index:
            styles.loc[idx, :] = f'background-color: {bg_color}'
        previous_group_type = group_type
    
    return styles

# Function to combine the styles
def combine_styles(highlighted, background):
    combined_styles = pd.DataFrame('', index=highlighted.index, columns=highlighted.columns)
    for col in highlighted.columns:
        for idx in highlighted.index:
            h_style = highlighted.loc[idx, col]
            b_style = background.loc[idx, col]
            combined_styles.loc[idx, col] = f'{h_style}; {b_style}' if h_style and b_style else h_style or b_style
    return combined_styles

# Apply both styles to the DataFrame
highlighted = highlight_max(df, group_levels=['Dataset', 'Shot', 'Type'])
# background = alternate_background(df)

# Combine the styles
combined_styles = combine_styles(highlighted, highlighted)

# Apply the combined styles to the DataFrame
styled_df = df.style.apply(lambda _: combined_styles, axis=None)

# Display the styled DataFrame
styled_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Mean,Mean,Mean,Mean,Mean,Mean,Variance,Variance,Variance,Variance,Variance,Variance,Min,Min,Min,Min,Min,Min,Max,Max,Max,Max,Max,Max
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Metric,AP,AP50,AP75,APl,APm,APs,AP,AP50,AP75,APl,APm,APs,AP,AP50,AP75,APl,APm,APs,AP,AP50,AP75,APl,APm,APs
Dataset,Shot,Config,Type,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2
DIOR,10_shot,faster_rcnn_FCT,test,22.0111,38.5204,23.5506,66.0012,21.7535,0.8578,0.0,0.0,0.0,0.0,0.0,0.0,22.0111,38.5204,23.5506,66.0012,21.7535,0.8578,22.0111,38.5204,23.5506,66.0012,21.7535,0.8578
DIOR,10_shot,faster_rcnn_FCT,train,51.8869,69.5572,57.7049,88.5412,51.6611,10.8314,0.0,0.0,0.0,0.0,0.0,0.0,51.8869,69.5572,57.7049,88.5412,51.6611,10.8314,51.8869,69.5572,57.7049,88.5412,51.6611,10.8314
DIOR,10_shot,fcos_PVTV2B2LI_FPNRETINANET_XQSABGA,test,2.202774,5.301992,1.55971,4.28907,2.86254,0.671542,0.01463,0.130739,1.1e-05,0.020748,0.043978,0.0,2.081819,4.940413,1.556376,4.145028,2.652832,0.671337,2.323729,5.663571,1.563044,4.433112,3.072249,0.671746
DIOR,10_shot,fcos_PVTV2B2LI_FPNRETINANET_XQSABGA,train,1.130927,1.769061,1.247669,3.122553,0.562451,0.175529,0.079611,0.252738,0.119893,0.091715,0.099367,0.001023,0.848772,1.26633,0.901414,2.819708,0.247226,0.143541,1.413082,2.271791,1.593925,3.425399,0.877676,0.207518
DIOR,10_shot,fcos_R50_FPNRETINANET_XQSABGA,test,3.402906,7.115067,2.956286,6.571011,3.462631,0.312995,0.001423,0.004309,0.001697,0.045086,0.025734,0.032511,3.36519,7.049422,2.915088,6.358675,3.302213,0.132686,3.440622,7.180712,2.997484,6.783347,3.623049,0.493303
DIOR,10_shot,fcos_R50_FPNRETINANET_XQSABGA,train,0.749635,1.232995,0.751782,2.288299,0.424864,0.121632,0.014634,0.016015,0.04383,0.384246,0.000551,0.001226,0.628665,1.106444,0.542425,1.668423,0.401381,0.086616,0.870606,1.359547,0.961139,2.908174,0.448346,0.156647
DOTA,10_shot,faster_rcnn_FCT,test,17.5446,32.6972,16.6045,46.2229,19.7232,16.2349,0.0,0.0,0.0,0.0,0.0,0.0,17.5446,32.6972,16.6045,46.2229,19.7232,16.2349,17.5446,32.6972,16.6045,46.2229,19.7232,16.2349
DOTA,10_shot,faster_rcnn_FCT,train,28.3912,49.9011,28.2457,55.3671,54.6035,22.9618,0.0,0.0,0.0,0.0,0.0,0.0,28.3912,49.9011,28.2457,55.3671,54.6035,22.9618,28.3912,49.9011,28.2457,55.3671,54.6035,22.9618
DOTA,10_shot,fcos_PVTV2B2LI_FPNRETINANET_XQSABGA,test,1.099102,2.076803,1.044388,2.557042,2.149336,0.697943,0.007287,0.009614,0.032312,0.640852,0.327656,0.092116,1.013736,1.978754,0.864632,1.75651,1.576924,0.394436,1.184468,2.174852,1.224144,3.357575,2.721748,1.001449
DOTA,10_shot,fcos_PVTV2B2LI_FPNRETINANET_XQSABGA,train,0.814843,1.905213,0.625684,1.877742,1.210757,0.249472,0.023323,0.117309,0.011505,0.0626,0.030947,0.00372,0.662124,1.562709,0.518424,1.627543,1.034839,0.188481,0.967561,2.247717,0.732943,2.127941,1.386675,0.310463
