In [19]:
import os
import pandas as pd

# Define the base folders
model_types = [
    "plumber_img_prompt", "plumber_img_text_proj_img_prompt_cls_LP",
    "plumber_img_proj", "plumber_img_prompt_cls_LP", 
    "plumber_text_proj_img_prompt", "plumber_img_proj_cls_LP", 
    "plumber_img_text_proj", "plumber_text_proj_img_prompt_cls_LP", 
    "plumber_img_proj_img_prompt", "plumber_img_text_proj_cls_LP", 
    "plumber_img_proj_img_prompt_cls_LP", "plumber_img_text_proj_img_prompt"
]
corruption_folders = [
    "brightness", "contrast", "defocus_blur", "elastic_transform", "fog",
    "frost", "gaussian_blur", "gaussian_noise", "glass_blur", "impulse_noise",
    "jpeg_compression", "motion_blur", "pixelate", "saturate", "shot_noise",
    "snow", "spatter", "speckle_noise", "zoom_blur"
]

# Directory where these folders are located
base_directory = "logs/cifar10-limited/resnet18"  # Replace with the actual path

remaining_dir = "_clsEpoch_29_bs_128_lr_0.1_teT_2.0_sT_1.0_imgweight_1.0_txtweight_1.0_is_mlp_False/step_1/failure_detector"

# Function to extract metrics from a file
def extract_metrics(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
        data = {}
        for line in lines:
            if line.startswith("proj_features") or line.startswith("classifier_features") or line.startswith("clip_features"):
                parts = line.split()
                data_type = parts[0]
                metrics = {
                    "Accuracy": float(parts[2].strip('%,')),
                    "Precision": float(parts[4].strip('%,')),
                    "Recall": float(parts[6].strip('%,')),
                    "F1 Score": float(parts[9].strip('%'))
                }
                data[data_type] = metrics
        return data


In [20]:

data_list = []
# Iterate over the base folders and corruption folders
for model_type in model_types:
    path = os.path.join(base_directory, model_type)
    for corruption_folder in corruption_folders:
        corruption_path = os.path.join(path, remaining_dir, f"cifar10-c_{corruption_folder}_3")
        metrics_file = os.path.join(corruption_path, "metrics.txt")
        if os.path.exists(metrics_file):
            metrics_data = extract_metrics(metrics_file)
            print(metrics_data)
            for key, values in metrics_data.items():
                row = {"Model Type": model_type, 
                                "Corruption": corruption_folder,
                                "Feature Type": key,
                                **values}
                
                data_list.append(row)
        else:
            print("Metrics file not found for", metrics_file)



{'proj_features': {'Accuracy': 71.31, 'Precision': 83.6, 'Recall': 72.69, 'F1 Score': 77.76}, 'classifier_features': {'Accuracy': 96.22, 'Precision': 99.32, 'Recall': 95.17, 'F1 Score': 97.2}, 'clip_features': {'Accuracy': 66.51, 'Precision': 81.74, 'Recall': 66.28, 'F1 Score': 73.2}}
Metrics file not found for logs/cifar10-limited/resnet18/plumber_img_prompt/_clsEpoch_29_bs_128_lr_0.1_teT_2.0_sT_1.0_imgweight_1.0_txtweight_1.0_is_mlp_False/step_1/failure_detector/cifar10-c_contrast_3/metrics.txt
Metrics file not found for logs/cifar10-limited/resnet18/plumber_img_prompt/_clsEpoch_29_bs_128_lr_0.1_teT_2.0_sT_1.0_imgweight_1.0_txtweight_1.0_is_mlp_False/step_1/failure_detector/cifar10-c_defocus_blur_3/metrics.txt
Metrics file not found for logs/cifar10-limited/resnet18/plumber_img_prompt/_clsEpoch_29_bs_128_lr_0.1_teT_2.0_sT_1.0_imgweight_1.0_txtweight_1.0_is_mlp_False/step_1/failure_detector/cifar10-c_elastic_transform_3/metrics.txt
Metrics file not found for logs/cifar10-limited/resne

In [21]:
# Display the dataframe
df = pd.DataFrame(data_list)

# Save the dataframe
df.to_csv("cifar10-limited-resnet18.csv", index=False)
print(df)

                          Model Type      Corruption         Feature Type   
0                 plumber_img_prompt      brightness        proj_features  \
1                 plumber_img_prompt      brightness  classifier_features   
2                 plumber_img_prompt      brightness        clip_features   
3                 plumber_img_prompt           frost        proj_features   
4                 plumber_img_prompt           frost  classifier_features   
..                               ...             ...                  ...   
93  plumber_img_text_proj_img_prompt   gaussian_blur        proj_features   
94  plumber_img_text_proj_img_prompt  gaussian_noise        proj_features   
95  plumber_img_text_proj_img_prompt   impulse_noise        proj_features   
96  plumber_img_text_proj_img_prompt     motion_blur        proj_features   
97  plumber_img_text_proj_img_prompt        pixelate        proj_features   

    Accuracy  Precision  Recall  F1 Score  
0      71.31      83.60   72.69

In [30]:
# Group by 'Model Type' and 'Feature Type' and calculate mean for specific columns
grouped_df = df.groupby(['Model Type', 'Feature Type']).agg({
    'Accuracy': 'mean',
    'Precision': 'mean',
    'Recall': 'mean',
    'F1 Score': 'mean'
}).reset_index()

# Display the new dataframe with mean values
print(grouped_df)
grouped_df.to_csv("cifar10-limited-resnet18-grouped.csv", index=False)

                                 Model Type         Feature Type   Accuracy   
0                          plumber_img_proj        proj_features  57.947143  \
1                   plumber_img_proj_cls_LP        proj_features  57.604286   
2               plumber_img_proj_img_prompt        proj_features  62.461429   
3        plumber_img_proj_img_prompt_cls_LP        proj_features  62.761429   
4                        plumber_img_prompt  classifier_features  95.718571   
5                        plumber_img_prompt        clip_features  59.722857   
6                        plumber_img_prompt        proj_features  64.442857   
7                 plumber_img_prompt_cls_LP        proj_features  63.111429   
8                     plumber_img_text_proj        proj_features  54.861429   
9              plumber_img_text_proj_cls_LP        proj_features  57.058571   
10         plumber_img_text_proj_img_prompt        proj_features  60.102857   
11  plumber_img_text_proj_img_prompt_cls_LP        p

In [31]:
model_mapping = {
    "Image (Prompt)": "plumber_img_prompt",
    "Image (Project)": "plumber_img_proj",
    "Image (Prompt + Project)": "plumber_img_proj_img_prompt",
    "Image (Prompt) + Text (Cls Prompt)": "plumber_img_prompt_cls_LP",
    "Image (Prompt) + Text (Project)": "plumber_text_proj_img_prompt",
    "Image (Prompt) + Text (Prompt + Project)": "plumber_text_proj_img_prompt_cls_LP",
    "Image (Project) + Text (Cls Prompt)": "plumber_img_proj_cls_LP",
    "Image (Project) + Text (Project)": "plumber_img_text_proj",
    "Image (Project) + Text (Prompt + Project)": "plumber_img_text_proj_cls_LP",
    "Image (Prompt + Project) + Text (Cls Prompt)": "plumber_img_proj_img_prompt_cls_LP",
    "Image (Prompt + Project) + Text (Project)": "plumber_img_text_proj_img_prompt",
    "Image (Prompt + Project) + Text (Prompt + Project)": "plumber_img_text_proj_img_prompt_cls_LP"
}
# Reverse the dictionary for replacement (value to key)
reverse_mapping = {v: k for k, v in model_mapping.items()}

# Replace the 'Model Type' in the DataFrame using the reversed mapping
grouped_df['Model Type'] = grouped_df['Model Type'].map(reverse_mapping)

# Create a custom sort order based on the order of keys in model_mapping
sort_order = {k: i for i, k in enumerate(model_mapping.keys())}

# Add a temporary sorting column based on the custom order
grouped_df['Sort Order'] = grouped_df['Model Type'].map(sort_order)

# Sort the DataFrame by this custom order and drop the temporary column
grouped_df = grouped_df.sort_values(by='Sort Order').drop('Sort Order', axis=1)

# Display the sorted DataFrame
print(grouped_df)
grouped_df.to_csv("cifar10-limited-resnet18-grouped-sorted.csv", index=False)

                                           Model Type         Feature Type   
4                                      Image (Prompt)  classifier_features  \
5                                      Image (Prompt)        clip_features   
6                                      Image (Prompt)        proj_features   
0                                     Image (Project)        proj_features   
2                            Image (Prompt + Project)        proj_features   
7                  Image (Prompt) + Text (Cls Prompt)        proj_features   
12                    Image (Prompt) + Text (Project)        proj_features   
13           Image (Prompt) + Text (Prompt + Project)        proj_features   
1                 Image (Project) + Text (Cls Prompt)        proj_features   
8                    Image (Project) + Text (Project)        proj_features   
9           Image (Project) + Text (Prompt + Project)        proj_features   
3        Image (Prompt + Project) + Text (Cls Prompt)        pro

In [37]:
list(grouped_df["Recall"])

[93.63,
 46.02285714285715,
 51.43428571428571,
 49.47571428571428,
 57.38142857142857,
 47.021428571428565,
 52.20428571428572,
 58.005714285714284,
 49.85857142857143,
 47.33571428571429,
 47.941428571428574,
 51.81714285714286,
 49.00714285714286,
 45.730000000000004]