In [1]:
import json
import numpy as np

# Function to read JSON file and parse results
def read_results(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        results = json.load(f)
    return results

# Function to compute mean metrics over the classes
def compute_mean_metrics(results):
    analysis = {}
    for config, data in results.items():
        few_shot_data = data.get("few_shot", {})
        analysis[config] = {}
        for shot, shot_data in few_shot_data.items():
            analysis[config][shot] = {"train": {}, "test": {}}
            for res_type in ["train", "test"]:
                res_data = shot_data.get(res_type, {})
                overall_metrics = res_data.get("overall", {})
                class_metrics = {k: v for k, v in res_data.items() if k != "overall"}
                
                # Dynamically get the metric names from the class metrics
                if class_metrics:
                    sample_class = next(iter(class_metrics.values()))
                    metric_names = sample_class.keys()
                else:
                    metric_names = []
                
                # Initialize dictionary to hold the sums and counts for each metric
                metric_sums = {metric: 0 for metric in metric_names}
                metric_counts = {metric: 0 for metric in metric_names}
                
                # Collect metric values for each class
                for metrics in class_metrics.values():
                    for metric in metric_names:
                        value = metrics[metric]
                        if metric in metrics and not np.isnan(value) and value != -1:
                            metric_sums[metric] += value
                            metric_counts[metric] += 1
                
                # Compute mean for each metric
                mean_metrics = {metric: (metric_sums[metric] / metric_counts[metric] if metric_counts[metric] > 0 else float('nan')) for metric in metric_names}
                analysis[config][shot][res_type] = {
                    "mean_metrics": mean_metrics,
                    "overall": overall_metrics
                }
                
    return analysis

# Function to add new evaluation results to existing analysis
def add_new_results(analysis, new_results):
    for config, config_data in new_results.items():
        if config not in analysis:
            analysis[config] = config_data
        else:
            for shot, shot_data in config_data["few_shot"].items():
                if shot not in analysis[config]:
                    analysis[config][shot] = shot_data
                else:
                    for res_type in ["train", "test"]:
                        if res_type not in analysis[config][shot]:
                            analysis[config][shot][res_type] = shot_data[res_type]
                        else:
                            for metric, value in shot_data[res_type]["overall"].items():
                                if metric in analysis[config][shot][res_type]["overall"]:
                                    analysis[config][shot][res_type]["overall"][metric] = value


# Function to print analysis results
def print_analysis(analysis):
    for config, data in analysis.items():
        print(f"Config: {config}")
        for shot, shot_data in data.items():
            print(f"  Shot: {shot}")
            for res_type, res_data in shot_data.items():
                
                overall = res_data["overall"]
                print(f"    {res_type.capitalize()}:")

                if "mean_metrics" in res_data:
                    mean_metrics = res_data["mean_metrics"]
                    for metric, value in mean_metrics.items():
                        print(f"      Mean {metric}: {value:.4f}")
                print(f"      Overall Metrics: {overall}")

In [2]:
# Path to the JSON results file
file_path = 'evaluation_results.json'

# Read the results from the JSON file
results = read_results(file_path)

# Compute the analysis
analysis = compute_mean_metrics(results)

# New evaluation results to be added
new_evaluation_results = {
    "faster_rcnn_FCT_DOTA.yaml": {
        "10_shot": {
            "train": {
                "overall": {
                    "AP": 28.3912,
                    "AP50": 49.9011,
                    "AP75": 28.2457,
                    "APs": 22.9618,
                    "APm": 54.6035,
                    "APl": 55.3671
                }
            },
            "test": {
                "overall": {
                    "AP": 17.5446,
                    "AP50": 32.6972,
                    "AP75": 16.6045,
                    "APs": 16.2349,
                    "APm": 19.7232,
                    "APl": 46.2229
                }
            }
        }
    },
    "faster_rcnn_FCT_DIOR.yaml": {
        "10_shot": {
            "train": {
                "overall": {
                    "AP": 51.8869,
                    "AP50": 69.5572,
                    "AP75": 57.7049,
                    "APs": 10.8314,
                    "APm": 51.6611,
                    "APl": 88.5412
                }
            },
            "test": {
                "overall": {
                    "AP": 22.0111,
                    "AP50": 38.5204,
                    "AP75": 23.5506,
                    "APs": 0.8578,
                    "APm": 21.7535,
                    "APl": 66.0012
                }
            }
        }
    }
}

# Divide new results by 100
for config, config_data in new_evaluation_results.items():
    for shot, shot_data in config_data.items():
        for res_type in ["train", "test"]:
            for metric, value in shot_data[res_type]["overall"].items():
                shot_data[res_type]["overall"][metric] = value / 100

# Add the new evaluation results to the analysis
add_new_results(analysis, new_evaluation_results)

# Print the analysis
print_analysis(analysis)

Config: fcos_PVT_V2_B2_LI_FPN_RETINANET_DOTA.yaml
  Shot: 1_shot
    Train:
      Mean AP: 0.2433
      Mean AP50: 0.4811
      Mean AP75: 0.2137
      Mean APs: 0.1265
      Mean APm: 0.2668
      Mean APl: 0.3152
      Overall Metrics: {'AP': 0.2777742317479007, 'AP50': 0.5250487502700667, 'AP75': 0.26175481858740957, 'APs': 0.15205280272615967, 'APm': 0.2797122013537455, 'APl': 0.3602380287288315}
    Test:
      Mean AP: 0.1254
      Mean AP50: 0.2549
      Mean AP75: 0.1136
      Mean APs: 0.1009
      Mean APm: 0.1434
      Mean APl: 0.1786
      Overall Metrics: {'AP': 0.12537204282836728, 'AP50': 0.2548595246998181, 'AP75': 0.11364735185075317, 'APs': 0.09876490196486629, 'APm': 0.14338808150493806, 'APl': 0.17859592894974266}
  Shot: 10_shot
    Train:
      Mean AP: 0.2540
      Mean AP50: 0.5110
      Mean AP75: 0.2189
      Mean APs: 0.1385
      Mean APm: 0.2872
      Mean APl: 0.3153
      Overall Metrics: {'AP': 0.2647621043123206, 'AP50': 0.5256320154794782, 'AP75': 0.2

In [3]:
import pandas as pd

# Function to extract dataset name from config file name and clean the config name
def extract_and_clean_config(config_file):
    parts = config_file.split('_')
    dataset_name = parts[-1].replace('.yaml', '')
    cleaned_config = '_'.join(parts[:-1])
    return cleaned_config, dataset_name

# Function to convert analysis to a pandas DataFrame
def analysis_to_dataframe(analysis):
    records = []
    for config, config_data in analysis.items():
        cleaned_config, dataset_name = extract_and_clean_config(config)
        for shot, shot_data in config_data.items():
            for res_type, res_data in shot_data.items():

                mean_metrics = res_data.get("mean_metrics", res_data['overall'])
                record = {
                    "Config": cleaned_config,
                    "Dataset": dataset_name,
                    "Shot": shot,
                    "Type": res_type
                }
                # Add mean metrics to the record
                record.update(mean_metrics)
                # Optionally, you can add overall metrics if needed
                # record.update({f"Overall_{k}": v for k, v in overall.items()})
                records.append(record)
    
    df = pd.DataFrame(records)
    return df


In [4]:
# Convert the analysis to a DataFrame
df = analysis_to_dataframe(analysis)

# Print or save the original DataFrame
print("Original DataFrame:")
df

Original DataFrame:


Unnamed: 0,Config,Dataset,Shot,Type,AP,AP50,AP75,APs,APm,APl
0,fcos_PVT_V2_B2_LI_FPN_RETINANET,DOTA,1_shot,train,0.243311,0.481102,0.213693,0.126457,0.266834,0.31515
1,fcos_PVT_V2_B2_LI_FPN_RETINANET,DOTA,1_shot,test,0.125372,0.25486,0.113647,0.100925,0.143388,0.178596
2,fcos_PVT_V2_B2_LI_FPN_RETINANET,DOTA,10_shot,train,0.254026,0.511001,0.218898,0.138475,0.287152,0.315333
3,fcos_PVT_V2_B2_LI_FPN_RETINANET,DOTA,10_shot,test,0.209991,0.362612,0.221284,0.136209,0.213111,0.279963
4,fcos_R_50_FPN_RETINANET,DOTA,1_shot,train,0.263001,0.512711,0.236285,0.126847,0.288868,0.338817
5,fcos_R_50_FPN_RETINANET,DOTA,1_shot,test,0.08701,0.17862,0.076507,0.082518,0.104053,0.184516
6,fcos_R_50_FPN_RETINANET,DOTA,10_shot,train,0.260135,0.517432,0.22923,0.131261,0.296426,0.330697
7,fcos_R_50_FPN_RETINANET,DOTA,10_shot,test,0.236616,0.404916,0.24975,0.143443,0.26155,0.321179
8,fcos_PVT_V2_B2_LI_FPN_RETINANET,DIOR,1_shot,train,0.39017,0.628022,0.428006,0.084023,0.320995,0.532492
9,fcos_PVT_V2_B2_LI_FPN_RETINANET,DIOR,1_shot,test,0.20602,0.336562,0.218313,0.043977,0.214874,0.282069


In [5]:
# Function to convert single index DataFrame to multi-index DataFrame
def convert_to_multiindex(df):
    # Set multi-index for rows with 'Dataset', 'Shot', 'Type', and 'Config'
    df_multi = df.set_index(['Dataset', 'Shot', 'Type', 'Config'])
    
    # Sort the multi-index DataFrame for better readability
    df_multi = df_multi.sort_index()
    
    # Optionally, you can set multi-index for columns if needed (e.g., splitting metrics)
    # Here we keep the columns as they are because they represent different metrics
    return df_multi

In [11]:
# Convert to a multi-index DataFrame
df_multi = convert_to_multiindex(df)

# Print or save the multi-index DataFrame
print("Multi-Index DataFrame:")
df_multi

Multi-Index DataFrame:


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,AP,AP50,AP75,APs,APm,APl
Dataset,Shot,Type,Config,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
DIOR,10_shot,test,faster_rcnn_FCT,0.220111,0.385204,0.235506,0.008578,0.217535,0.660012
DIOR,10_shot,test,fcos_PVT_V2_B2_LI_FPN_RETINANET,0.229834,0.392791,0.23591,0.045722,0.220789,0.314814
DIOR,10_shot,test,fcos_R_50_FPN_RETINANET,0.140644,0.267625,0.135869,0.023198,0.140472,0.211243
DIOR,10_shot,train,faster_rcnn_FCT,0.518869,0.695572,0.577049,0.108314,0.516611,0.885412
DIOR,10_shot,train,fcos_PVT_V2_B2_LI_FPN_RETINANET,0.384159,0.634558,0.412625,0.084311,0.33019,0.533163
DIOR,10_shot,train,fcos_R_50_FPN_RETINANET,0.300327,0.500952,0.322059,0.028387,0.193652,0.426037
DIOR,1_shot,test,fcos_PVT_V2_B2_LI_FPN_RETINANET,0.20602,0.336562,0.218313,0.043977,0.214874,0.282069
DIOR,1_shot,test,fcos_R_50_FPN_RETINANET,0.127097,0.225897,0.122866,0.020116,0.140019,0.197389
DIOR,1_shot,train,fcos_PVT_V2_B2_LI_FPN_RETINANET,0.39017,0.628022,0.428006,0.084023,0.320995,0.532492
DIOR,1_shot,train,fcos_R_50_FPN_RETINANET,0.327854,0.525674,0.355347,0.029458,0.217893,0.458035


In [7]:
# Function to highlight the maximum values in bold for each group
def highlight_max(df, group_levels, color='darkorange'):
    '''
    Highlight the maximum in a DataFrame group
    '''
    attr = f'font-weight: bold; font-style: italic; color: {color}'
    # Create a DataFrame with the same shape filled with empty strings
    styles = pd.DataFrame('', index=df.index, columns=df.columns)
    
    # Iterate over each group and apply the highlight
    for group_name, group_df in df.groupby(level=group_levels):
        # Find the max values for each column within the group
        max_values = group_df.max()
        # Apply the style where the value matches the max value within the group
        for column in df.columns:
            is_max = group_df[column] == max_values[column]
            styles.loc[group_df.index, column] = np.where(is_max, attr, '')
    
    return styles

# Function to alternate the background colors for the "train" and "test" groups on index level
def alternate_background(df):
    styles = pd.DataFrame('', index=df.index, columns=df.columns)
    
    # Alternate background color for "train" and "test" groups
    colors = ['#333333', '#555555']
    
    group_counter = 0
    previous_group_type = None
    for (dataset, shot, group_type), group_df in df.groupby(level=['Dataset', 'Shot', 'Type']):
        if group_type != previous_group_type:
            group_counter += 1
        bg_color = colors[group_counter % 2]
        for idx in group_df.index:
            styles.loc[idx, :] = f'background-color: {bg_color}'
        previous_group_type = group_type
    
    return styles

In [18]:
# Apply the highlight_max function to each group of dataset, shot, and type
styled_df = df_multi.style.apply(highlight_max, group_levels=['Dataset', 'Shot', 'Type'], axis=None)

# Apply the alternating background function to each group of dataset, shot, and type
styled_df = styled_df.apply(alternate_background, axis=None)
    
styled_df.format(lambda x: f'{x * 100:.2f}')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,AP,AP50,AP75,APs,APm,APl
Dataset,Shot,Type,Config,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
DIOR,10_shot,test,faster_rcnn_FCT,22.01,38.52,23.55,0.86,21.75,66.0
DIOR,10_shot,test,fcos_PVT_V2_B2_LI_FPN_RETINANET,22.98,39.28,23.59,4.57,22.08,31.48
DIOR,10_shot,test,fcos_R_50_FPN_RETINANET,14.06,26.76,13.59,2.32,14.05,21.12
DIOR,10_shot,train,faster_rcnn_FCT,51.89,69.56,57.7,10.83,51.66,88.54
DIOR,10_shot,train,fcos_PVT_V2_B2_LI_FPN_RETINANET,38.42,63.46,41.26,8.43,33.02,53.32
DIOR,10_shot,train,fcos_R_50_FPN_RETINANET,30.03,50.1,32.21,2.84,19.37,42.6
DIOR,1_shot,test,fcos_PVT_V2_B2_LI_FPN_RETINANET,20.6,33.66,21.83,4.4,21.49,28.21
DIOR,1_shot,test,fcos_R_50_FPN_RETINANET,12.71,22.59,12.29,2.01,14.0,19.74
DIOR,1_shot,train,fcos_PVT_V2_B2_LI_FPN_RETINANET,39.02,62.8,42.8,8.4,32.1,53.25
DIOR,1_shot,train,fcos_R_50_FPN_RETINANET,32.79,52.57,35.53,2.95,21.79,45.8
