# V1

In [18]:
import os
import pandas as pd
import json
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Define the base folders
model_types = [
    "plumber_img_prompt", "plumber_img_text_proj_img_prompt_cls_LP",
    "plumber_img_proj", "plumber_img_prompt_cls_LP", 
    "plumber_text_proj_img_prompt", "plumber_img_proj_cls_LP", 
    "plumber_img_text_proj", "plumber_text_proj_img_prompt_cls_LP", 
    "plumber_img_proj_img_prompt", "plumber_img_text_proj_cls_LP", 
    "plumber_img_proj_img_prompt_cls_LP", "plumber_img_text_proj_img_prompt"
]
corruption_folders = [
    "brightness", "contrast", "defocus_blur", "elastic_transform", "fog",
    "frost", "gaussian_blur", "gaussian_noise", "glass_blur", "impulse_noise",
    "jpeg_compression", "motion_blur", "pixelate", "saturate", "shot_noise",
    "snow", "spatter", "speckle_noise", "zoom_blur"
]
corruption_folders = ["clipart", "infograph", "painting", "quickdraw", "real", "sketch"]
# corruption_folders = [f"domainnet_{folder}_" for folder in corruption_folders]

# corruption_folders = [f"cifar10-c_{folder}_4" for folder in corruption_folders]

corruption_folders = [""]

dataset = "cifar10"
save_data_name = "cifar10"
model_name = "SimpleCNN"

epoch = 29

# Directory where these folders are located
base_directory = f"logs/{dataset}/{model_name}"  # Replace with the actual path

remaining_dir = f"_clsEpoch_{epoch}_bs_128_lr_0.1_teT_2.0_sT_1.0_imgweight_1.0_txtweight_1.0_is_mlp_False/step_1/failure_detector"

# Function to extract metrics from a file
def extract_metrics(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
        data = {}
        for line in lines:
            if line.startswith("proj_features") or line.startswith("classifier_features") or line.startswith("clip_features"):
                parts = line.split()
                data_type = parts[0]
                metrics = {
                    "Accuracy": float(parts[2].strip('%,')),
                    "Precision": float(parts[4].strip('%,')),
                    "Recall": float(parts[6].strip('%,')),
                    "F1 Score": float(parts[9].strip('%'))
                }
                data[data_type] = metrics
        return data

def extract_metrics_from_json(file_path):
    # classifier_features_metrics_single_svm
    model_type = file_path.split("/")[-1].split("_metrics")[0]

    # if model_type == "plumber_img_prompt" add update the model file_path to also include proj_features_metrics_single_svm and clip_features_metrics_single_svm
    if model_type == "plumber_img_prompt":
        file_paths = [file_path] + [file_path.replace("proj_features", "classifier_features")] + [file_path.replace("proj_features", "clip_features")]
    else:
        file_paths = [file_path]
    
    all_metrics = {}
    for file_path in file_paths:
        metrics = {}
        with open(file_path, 'r') as file:
            data = json.load(file)
        
        metrics['task_model_acc'] = accuracy_score(data["gt_labels"], data["task_pred"])
        metrics['estimated_acc'] = sum(data["correct_svm_pred"]) / len(data["correct_svm_pred"])
        metrics['estimation_gap'] = data["estimation_gap"]
        metrics['failure_svm_accuracy'] = data["accu_failure_pred"]
        metrics['sucess_svm_accuracy'] = data["accu_success_pred"]

        all_metrics[model_type] = metrics

    return all_metrics

In [19]:

data_list = []
# Iterate over the base folders and corruption folders
for model_type in model_types:
    path = os.path.join(base_directory, model_type)
    for corruption_folder in corruption_folders:
        corruption_path = os.path.join(path, remaining_dir, f"{corruption_folder}")
        metrics_file = os.path.join(corruption_path, "single_svm_metrics.txt")
        if os.path.exists(metrics_file):
            metrics_data = extract_metrics(metrics_file)
            print(metrics_data)
            for key, values in metrics_data.items():
                row = {"Model Type": model_type, 
                                "Corruption": corruption_folder,
                                "Feature Type": key,
                                **values}
                
                data_list.append(row)
        else:
            print("Metrics file not found for", metrics_file)



{'classifier_features': {'Accuracy': 65.18, 'Precision': 48.02, 'Recall': 61.19, 'F1 Score': 53.81}, 'clip_features': {'Accuracy': 66.93, 'Precision': 50.11, 'Recall': 58.81, 'F1 Score': 54.11}, 'proj_features': {'Accuracy': 66.31, 'Precision': 49.32, 'Recall': 59.49, 'F1 Score': 53.93}}
{'classifier_features': {'Accuracy': 76.07, 'Precision': 23.28, 'Recall': 49.86, 'F1 Score': 31.74}, 'clip_features': {'Accuracy': 75.39, 'Precision': 22.57, 'Recall': 49.57, 'F1 Score': 31.01}, 'proj_features': {'Accuracy': 77.0, 'Precision': 24.42, 'Recall': 50.66, 'F1 Score': 32.96}}
{'classifier_features': {'Accuracy': 64.9, 'Precision': 45.52, 'Recall': 62.5, 'F1 Score': 52.68}, 'clip_features': {'Accuracy': 68.53, 'Precision': 49.73, 'Recall': 62.65, 'F1 Score': 55.45}, 'proj_features': {'Accuracy': 69.07, 'Precision': 50.44, 'Recall': 60.73, 'F1 Score': 55.11}}
{'classifier_features': {'Accuracy': 86.05, 'Precision': 9.63, 'Recall': 36.28, 'F1 Score': 15.22}, 'clip_features': {'Accuracy': 88.98,

In [20]:
# Display the dataframe
df = pd.DataFrame(data_list)

# Save the dataframe
df.to_csv(f"{save_data_name}-{model_name}.csv", index=False)
print(df)

                          Model Type            Corruption  \
0                 plumber_img_prompt    domainnet_clipart_   
1                 plumber_img_prompt    domainnet_clipart_   
2                 plumber_img_prompt    domainnet_clipart_   
3                 plumber_img_prompt  domainnet_infograph_   
4                 plumber_img_prompt  domainnet_infograph_   
..                               ...                   ...   
79  plumber_img_text_proj_img_prompt  domainnet_infograph_   
80  plumber_img_text_proj_img_prompt   domainnet_painting_   
81  plumber_img_text_proj_img_prompt  domainnet_quickdraw_   
82  plumber_img_text_proj_img_prompt       domainnet_real_   
83  plumber_img_text_proj_img_prompt     domainnet_sketch_   

           Feature Type  Accuracy  Precision  Recall  F1 Score  
0   classifier_features     65.18      48.02   61.19     53.81  
1         clip_features     66.93      50.11   58.81     54.11  
2         proj_features     66.31      49.32   59.49     53.

In [23]:
# Create different dataframes for different corruption types
df_corruptions = {}
for corruption_folder in corruption_folders:
    df_corruption = df[df["Corruption"] == corruption_folder]
    df_corruptions[corruption_folder] = df_corruption
    print(df_corruption)

                                 Model Type          Corruption  \
0                        plumber_img_prompt  domainnet_clipart_   
1                        plumber_img_prompt  domainnet_clipart_   
2                        plumber_img_prompt  domainnet_clipart_   
18  plumber_img_text_proj_img_prompt_cls_LP  domainnet_clipart_   
24                         plumber_img_proj  domainnet_clipart_   
30                plumber_img_prompt_cls_LP  domainnet_clipart_   
36             plumber_text_proj_img_prompt  domainnet_clipart_   
42                  plumber_img_proj_cls_LP  domainnet_clipart_   
48                    plumber_img_text_proj  domainnet_clipart_   
54      plumber_text_proj_img_prompt_cls_LP  domainnet_clipart_   
60              plumber_img_proj_img_prompt  domainnet_clipart_   
66             plumber_img_text_proj_cls_LP  domainnet_clipart_   
72       plumber_img_proj_img_prompt_cls_LP  domainnet_clipart_   
78         plumber_img_text_proj_img_prompt  domainnet_clipart

In [21]:
# Group by 'Model Type' and 'Feature Type' and calculate mean for specific columns
grouped_df = df.groupby(['Model Type', 'Feature Type']).agg({
    'Accuracy': 'mean',
    'Precision': 'mean',
    'Recall': 'mean',
    'F1 Score': 'mean'
}).reset_index()

# Display the new dataframe with mean values
print(grouped_df)
grouped_df.to_csv(f"{save_data_name}-{model_name}-grouped.csv", index=False)

                                 Model Type         Feature Type   Accuracy  \
0                          plumber_img_proj        proj_features  71.863333   
1                   plumber_img_proj_cls_LP        proj_features  71.853333   
2               plumber_img_proj_img_prompt        proj_features  65.725000   
3        plumber_img_proj_img_prompt_cls_LP        proj_features  71.756667   
4                        plumber_img_prompt  classifier_features  70.876667   
5                        plumber_img_prompt        clip_features  72.576667   
6                        plumber_img_prompt        proj_features  72.485000   
7                 plumber_img_prompt_cls_LP        proj_features  72.350000   
8                     plumber_img_text_proj        proj_features  71.755000   
9              plumber_img_text_proj_cls_LP        proj_features  72.051667   
10         plumber_img_text_proj_img_prompt        proj_features  73.118333   
11  plumber_img_text_proj_img_prompt_cls_LP        p

In [22]:
model_mapping = {
    "Image (Prompt)": "plumber_img_prompt",
    "Image (Project)": "plumber_img_proj",
    "Image (Prompt + Project)": "plumber_img_proj_img_prompt",
    "Image (Prompt) + Text (Cls Prompt)": "plumber_img_prompt_cls_LP",
    "Image (Prompt) + Text (Project)": "plumber_text_proj_img_prompt",
    "Image (Prompt) + Text (Prompt + Project)": "plumber_text_proj_img_prompt_cls_LP",
    "Image (Project) + Text (Cls Prompt)": "plumber_img_proj_cls_LP",
    "Image (Project) + Text (Project)": "plumber_img_text_proj",
    "Image (Project) + Text (Prompt + Project)": "plumber_img_text_proj_cls_LP",
    "Image (Prompt + Project) + Text (Cls Prompt)": "plumber_img_proj_img_prompt_cls_LP",
    "Image (Prompt + Project) + Text (Project)": "plumber_img_text_proj_img_prompt",
    "Image (Prompt + Project) + Text (Prompt + Project)": "plumber_img_text_proj_img_prompt_cls_LP"
}
# Reverse the dictionary for replacement (value to key)
reverse_mapping = {v: k for k, v in model_mapping.items()}

# Replace the 'Model Type' in the DataFrame using the reversed mapping
grouped_df['Model Type'] = grouped_df['Model Type'].map(reverse_mapping)

# Create a custom sort order based on the order of keys in model_mapping
sort_order = {k: i for i, k in enumerate(model_mapping.keys())}

# Add a temporary sorting column based on the custom order
grouped_df['Sort Order'] = grouped_df['Model Type'].map(sort_order)

# Sort the DataFrame by this custom order and drop the temporary column
grouped_df = grouped_df.sort_values(by='Sort Order').drop('Sort Order', axis=1)

# Display the sorted DataFrame
print(grouped_df)
# grouped_df.to_csv("Waterbirds-resnet18-grouped-sorted.csv", index=False)

                                           Model Type         Feature Type  \
4                                      Image (Prompt)  classifier_features   
5                                      Image (Prompt)        clip_features   
6                                      Image (Prompt)        proj_features   
0                                     Image (Project)        proj_features   
2                            Image (Prompt + Project)        proj_features   
7                  Image (Prompt) + Text (Cls Prompt)        proj_features   
12                    Image (Prompt) + Text (Project)        proj_features   
13           Image (Prompt) + Text (Prompt + Project)        proj_features   
1                 Image (Project) + Text (Cls Prompt)        proj_features   
8                    Image (Project) + Text (Project)        proj_features   
9           Image (Project) + Text (Prompt + Project)        proj_features   
3        Image (Prompt + Project) + Text (Cls Prompt)        pro

In [8]:
# list(grouped_df["Accuracy"])
# list(grouped_df["Precision"])
list(grouped_df["Recall"])

[60.64,
 61.64,
 62.14,
 61.99,
 54.99,
 60.96,
 58.0,
 55.4,
 61.83,
 61.18,
 61.53,
 58.19,
 52.88,
 54.51]

In [25]:
# Group each corruption type by 'Model Type' and 'Feature Type' and calculate mean for specific columns and remap the 'Model Type' using the reversed mapping
# Also sort the DataFrame by the custom order

# Create a custom sort order based on the order of keys in model_mapping
sort_order = {k: i for i, k in enumerate(model_mapping.keys())}

grouped_corruptions = {}
for corruption_folder in corruption_folders:
    df_corruption = df_corruptions[corruption_folder]
    grouped_df_corruption = df_corruption.groupby(['Model Type', 'Feature Type']).agg({
        'Accuracy': 'mean',
        'Precision': 'mean',
        'Recall': 'mean',
        'F1 Score': 'mean'
    }).reset_index()
    grouped_df_corruption['Model Type'] = grouped_df_corruption['Model Type'].map(reverse_mapping)
    grouped_df_corruption['Sort Order'] = grouped_df_corruption['Model Type'].map(sort_order)
    grouped_df_corruption = grouped_df_corruption.sort_values(by='Sort Order').drop('Sort Order', axis=1)
    grouped_corruptions[corruption_folder] = grouped_df_corruption
    print(grouped_df_corruption)
    

                                           Model Type         Feature Type  \
4                                      Image (Prompt)  classifier_features   
5                                      Image (Prompt)        clip_features   
6                                      Image (Prompt)        proj_features   
0                                     Image (Project)        proj_features   
2                            Image (Prompt + Project)        proj_features   
7                  Image (Prompt) + Text (Cls Prompt)        proj_features   
12                    Image (Prompt) + Text (Project)        proj_features   
13           Image (Prompt) + Text (Prompt + Project)        proj_features   
1                 Image (Project) + Text (Cls Prompt)        proj_features   
8                    Image (Project) + Text (Project)        proj_features   
9           Image (Project) + Text (Prompt + Project)        proj_features   
3        Image (Prompt + Project) + Text (Cls Prompt)        pro

In [45]:

# a = ["clipart", "infograph", "painting", "quickdraw", "real", "sketch"]
list(grouped_corruptions[corruption_folders[5]]["Accuracy"])
list(grouped_corruptions[corruption_folders[5]]["Precision"])
list(grouped_corruptions[corruption_folders[5]]["Recall"])

[59.61,
 58.99,
 57.96,
 62.93,
 56.29,
 58.94,
 50.99,
 50.54,
 64.74,
 64.74,
 64.34,
 59.7,
 61.18,
 61.49]

# V2

In [17]:
import os
import pandas as pd
import json
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Define the base folders
model_types = [
    "plumber_img_prompt", "plumber_img_text_proj_img_prompt_cls_LP",
    "plumber_img_proj", "plumber_img_prompt_cls_LP", 
    "plumber_text_proj_img_prompt", "plumber_img_proj_cls_LP", 
    "plumber_img_text_proj", "plumber_text_proj_img_prompt_cls_LP", 
    "plumber_img_proj_img_prompt", "plumber_img_text_proj_cls_LP", 
    "plumber_img_proj_img_prompt_cls_LP", "plumber_img_text_proj_img_prompt"
]
corruption_folders = [
    "brightness", "contrast", "defocus_blur", "elastic_transform", "fog",
    "frost", "gaussian_blur", "gaussian_noise", "glass_blur", "impulse_noise",
    "jpeg_compression", "motion_blur", "pixelate", "saturate", "shot_noise",
    "snow", "spatter", "speckle_noise", "zoom_blur"
]
corruption_folders = ["clipart", "infograph", "painting", "quickdraw", "real", "sketch"]
# corruption_folders = [f"domainnet_{folder}_" for folder in corruption_folders]

# corruption_folders = [f"cifar10-c_{folder}_4" for folder in corruption_folders]

corruption_folders = [""]

dataset = "cifar100"
save_data_name = "cifar100"
model_name = "resnet50"

epoch = 199

# Directory where these folders are located
base_directory = f"logs/{dataset}/{model_name}"  # Replace with the actual path

remaining_dir = f"_clsEpoch_{epoch}_bs_128_lr_0.1_teT_2.0_sT_1.0_imgweight_1.0_txtweight_1.0_is_mlp_False/step_1/failure_detector"


def extract_metrics_from_json(file_path, model_type):

    # if model_type == "plumber_img_prompt" add update the model file_path to also include proj_features_metrics_single_svm and clip_features_metrics_single_svm
    if model_type == "plumber_img_prompt":
        file_paths = [file_path] + [file_path.replace("proj_features", "classifier_features")] + [file_path.replace("proj_features", "clip_features")]
    else:
        file_paths = [file_path]
    
    all_metrics = {}
    for file_path in file_paths:
        # Get the data type from the file path
        data_type = file_path.split("/")[-1].split("_metrics")[0]
        
        metrics = {}
        with open(file_path, 'r') as file:
            data = json.load(file)
        metrics['task_model_acc'] = accuracy_score(data["gt_labels"], data["task_pred"])
        metrics['estimated_acc'] = sum(data["correct_svm_pred"]) / len(data["correct_svm_pred"])
        metrics['estimation_gap'] = data["estimation_gap"]

        metrics['overall_svm_accuracy'] = data['class_report']['accuracy']

        metrics['failure_svm_accuracy'] = data["accu_failure_pred"]
        metrics['sucess_svm_accuracy'] = data["accu_success_pred"]

        all_metrics[data_type] = metrics

    return all_metrics

data_list = []
# Iterate over the base folders and corruption folders
for model_type in model_types:
    path = os.path.join(base_directory, model_type)
    for corruption_folder in corruption_folders:
        corruption_path = os.path.join(path, remaining_dir, f"{corruption_folder}")
        metrics_file = os.path.join(corruption_path, "proj_features_metrics_single_svm.json")
        if os.path.exists(metrics_file):
            metrics_data = extract_metrics_from_json(metrics_file, model_type)
            print(metrics_data)
            for key, values in metrics_data.items():
                row = {"Model Type": model_type, 
                                "Corruption": corruption_folder,
                                "Feature Type": key,
                                **values}
                
                data_list.append(row)
        else:
            print("Metrics file not found for", metrics_file)


{'proj_features': {'task_model_acc': 0.728, 'estimated_acc': 0.5613, 'estimation_gap': 0.16669999999999996, 'overall_svm_accuracy': 0.6565, 'failure_svm_accuracy': 0.675, 'sucess_svm_accuracy': 0.6495879120879121}, 'classifier_features': {'task_model_acc': 0.728, 'estimated_acc': 0.5778, 'estimation_gap': 0.1502, 'overall_svm_accuracy': 0.7212, 'failure_svm_accuracy': 0.7636029411764705, 'sucess_svm_accuracy': 0.7053571428571429}, 'clip_features': {'task_model_acc': 0.728, 'estimated_acc': 0.5747, 'estimation_gap': 0.1533, 'overall_svm_accuracy': 0.6287, 'failure_svm_accuracy': 0.5992647058823529, 'sucess_svm_accuracy': 0.6396978021978021}}
{'proj_features': {'task_model_acc': 0.728, 'estimated_acc': 0.5447, 'estimation_gap': 0.18330000000000002, 'overall_svm_accuracy': 0.6481, 'failure_svm_accuracy': 0.6900735294117647, 'sucess_svm_accuracy': 0.6324175824175824}}
{'proj_features': {'task_model_acc': 0.728, 'estimated_acc': 0.5576, 'estimation_gap': 0.1704, 'overall_svm_accuracy': 0.63

In [18]:
# Display the dataframe
df = pd.DataFrame(data_list)

# Save the dataframe
# df.to_csv(f"{save_data_name}-{model_name}.csv", index=False)


# Group by 'Model Type' and 'Feature Type' and calculate mean for specific columns
grouped_df = df.groupby(['Model Type', 'Feature Type']).agg({
    'task_model_acc': 'mean',
    'estimated_acc': 'mean',
    'estimation_gap': 'mean',
    'overall_svm_accuracy': 'mean',
    'failure_svm_accuracy': 'mean',
    'sucess_svm_accuracy': 'mean'
}).reset_index()

model_mapping = {
    "Image (Prompt)": "plumber_img_prompt",
    "Image (Project)": "plumber_img_proj",
    "Image (Prompt + Project)": "plumber_img_proj_img_prompt",
    "Image (Prompt) + Text (Cls Prompt)": "plumber_img_prompt_cls_LP",
    "Image (Prompt) + Text (Project)": "plumber_text_proj_img_prompt",
    "Image (Prompt) + Text (Prompt + Project)": "plumber_text_proj_img_prompt_cls_LP",
    "Image (Project) + Text (Cls Prompt)": "plumber_img_proj_cls_LP",
    "Image (Project) + Text (Project)": "plumber_img_text_proj",
    "Image (Project) + Text (Prompt + Project)": "plumber_img_text_proj_cls_LP",
    "Image (Prompt + Project) + Text (Cls Prompt)": "plumber_img_proj_img_prompt_cls_LP",
    "Image (Prompt + Project) + Text (Project)": "plumber_img_text_proj_img_prompt",
    "Image (Prompt + Project) + Text (Prompt + Project)": "plumber_img_text_proj_img_prompt_cls_LP"
}
# Reverse the dictionary for replacement (value to key)
reverse_mapping = {v: k for k, v in model_mapping.items()}

# Replace the 'Model Type' in the DataFrame using the reversed mapping
grouped_df['Model Type'] = grouped_df['Model Type'].map(reverse_mapping)

# Create a custom sort order based on the order of keys in model_mapping
sort_order = {k: i for i, k in enumerate(model_mapping.keys())}

# Add a temporary sorting column based on the custom order
grouped_df['Sort Order'] = grouped_df['Model Type'].map(sort_order)

# Sort the DataFrame by this custom order and drop the temporary column
grouped_df = grouped_df.sort_values(by='Sort Order').drop('Sort Order', axis=1)

# Display the sorted DataFrame
print(grouped_df)
# grouped_df.to_csv(f"{save_data_name}-{model_name}-grouped.csv", index=False)

                                           Model Type         Feature Type  \
4                                      Image (Prompt)  classifier_features   
5                                      Image (Prompt)        clip_features   
6                                      Image (Prompt)        proj_features   
0                                     Image (Project)        proj_features   
2                            Image (Prompt + Project)        proj_features   
7                  Image (Prompt) + Text (Cls Prompt)        proj_features   
12                    Image (Prompt) + Text (Project)        proj_features   
13           Image (Prompt) + Text (Prompt + Project)        proj_features   
1                 Image (Project) + Text (Cls Prompt)        proj_features   
8                    Image (Project) + Text (Project)        proj_features   
9           Image (Project) + Text (Prompt + Project)        proj_features   
3        Image (Prompt + Project) + Text (Cls Prompt)        pro

In [None]:
# list(grouped_df["Accuracy"])
# list(grouped_df["Precision"])
# list(grouped_df["Recall"])