In [3]:
import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, roc_auc_score
from pathlib import Path

# Define the CSV folder path and CSV files
folder_path = "..\csv\\"
mscoco2014_csv = os.path.join(folder_path, 'mscoco2014.csv')
dalle3_csv = os.path.join(folder_path, 'dalle-3.csv')

# Load MSCOCO2014 and DALLE-3 datasets
df_mscoco2014 = pd.read_csv(mscoco2014_csv)
df_dalle3 = pd.read_csv(dalle3_csv)

# Update df_mscoco2014 and df_dalle3 to have only 'true_label' column
df_mscoco2014['true_label'] = False
df_dalle3['true_label'] = True

# Define the threshold for considering an image as synthetic
probability_threshold = 0.2

def logits_to_probabilities(logits):
    return 1 / (1 + np.exp(-logits))

def calculate_model_performance(df, threshold):
    performances = {}
    for model_name, column in model_columns.items():
        pred_column = f'{model_name}_pred'
        prob_column = f'{model_name}_prob' if 'exif' not in model_name else column
        df[pred_column] = df[prob_column] > threshold
        performances[model_name] = {
            'accuracy': accuracy_score(df['true_label'], df[pred_column]),
            'auc': roc_auc_score(df['true_label'], df[prob_column]) if 'exif' not in model_name else np.nan
        }

    # Aggregate results
    prediction_columns = [f'{model}_pred' for model in model_columns.keys()]
    df['final_pred'] = df[prediction_columns].any(axis=1)
    performances['aggregate'] = {
        'accuracy': accuracy_score(df['true_label'], df['final_pred']),
        'auc': roc_auc_score(df['true_label'], df[prediction_columns].max(axis=1))
    }

    return performances

# Construct the pattern to match all CSV files in the folder
pattern = os.path.join(folder_path, '*.csv')
print(pattern)

# Use glob to find all files matching the pattern
csv_files = glob.glob(pattern)
print(csv_files)

# List of CSV files for each dataset
performance_across_datasets = {model: {'accuracy': [], 'auc': []} for model in ['logits_Grag2021_progan', 'logits_Grag2021_latent', 'resnet50nodown_progan', 'resnet50nodown_stylegan2', 'exif_model', 'aggregate']}

model_columns = {
    'logits_Grag2021_progan': 'dMDetectorResultsinferenceResults_logits_Grag2021_progan',
    'logits_Grag2021_latent': 'dMDetectorResultsinferenceResults_logits_Grag2021_latent',
    'resnet50nodown_progan': 'gANDetectorResultsinferenceResults_logits_gandetection_resnet50nodown_progan',
    'resnet50nodown_stylegan2': 'gANDetectorResultsinferenceResults_logits_gandetection_resnet50nodown_stylegan2',
    'exif_model': 'exifDetectorResultsinferenceResults_isSyntheticImage'
}

# Loop through each CSV file
csv_files = glob.glob(os.path.join(folder_path, '*.csv'))
for csv_file_path in csv_files:
    csv_file_name = os.path.basename(csv_file_path)
    df = pd.read_csv(csv_file_path)

    # Check dataset name for merging
    if csv_file_name in ['mscoco2014.csv', 'hdr.csv']:
        merged_df = pd.concat([df, df_dalle3], ignore_index=True)
    else:
        merged_df = pd.concat([df, df_mscoco2014], ignore_index=True)

    # Applying sigmoid function to convert logits to probabilities
    for model_name, column in model_columns.items():
        if 'exif' not in model_name:
            merged_df[f'{model_name}_prob'] = logits_to_probabilities(merged_df[column])

    # Calculate model performance
    model_performances = calculate_model_performance(merged_df, probability_threshold)

    # Store accuracies and AUC for each model across datasets
    for model, perf in model_performances.items():
        performance_across_datasets[model]['accuracy'].append(perf['accuracy'])
        performance_across_datasets[model]['auc'].append(perf['auc'])

    print(f"Processed data for {csv_file_name}")

# Convert performance data to DataFrame for plotting
performance_df = pd.DataFrame({
    (model, metric): values
    for model, metrics in performance_across_datasets.items()
    for metric, values in metrics.items()
}, index=csv_files)
display(performance_df)

# Assuming your DataFrame is named df
# Splitting your DataFrame into separate DataFrames for Accuracy and AUC
accuracy_df = performance_df.filter(like='accuracy', axis=1)
auc_df = performance_df.filter(like='auc', axis=1)

# Adding the 'Dataset' column to both DataFrames
accuracy_df['Dataset'] = df['Dataset']
auc_df['Dataset'] = df['Dataset']

# Melting the DataFrames to long format
accuracy_melted = accuracy_df.melt(id_vars='Dataset', var_name='Model', value_name='Accuracy')
auc_melted = auc_df.melt(id_vars='Dataset', var_name='Model', value_name='AUC')

# Removing the '_accuracy' and '_auc' from the 'Model' column
accuracy_melted['Model'] = accuracy_melted['Model'].str.replace('_accuracy', '')
auc_melted['Model'] = auc_melted['Model'].str.replace('_auc', '')

# Plotting Accuracy Across Datasets
plt.figure(figsize=(12, 6))
sns.barplot(x='Dataset', y='Accuracy', hue='Model', data=accuracy_melted)
plt.title('Accuracy of Detectors Across Datasets')
plt.xlabel('Dataset')
plt.ylabel('Accuracy')
plt.xticks(rotation=45)
plt.legend(title='Model', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

# Plotting AUC Across Datasets
plt.figure(figsize=(12, 6))
sns.barplot(x='Dataset', y='AUC', hue='Model', data=auc_melted)
plt.title('AUC of Detectors Across Datasets')
plt.xlabel('Dataset')
plt.ylabel('AUC')
plt.xticks(rotation=45)
plt.legend(title='Model', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

..\csv\*.csv
['..\\csv\\dalle-2.csv', '..\\csv\\dalle-3.csv', '..\\csv\\hdr.csv', '..\\csv\\midjourney.csv', '..\\csv\\mscoco2014.csv', '..\\csv\\vqgan.csv']
Processed data for dalle-2.csv
Processed data for dalle-3.csv
Processed data for hdr.csv
Processed data for midjourney.csv
Processed data for mscoco2014.csv
Processed data for vqgan.csv


Unnamed: 0_level_0,logits_Grag2021_progan,logits_Grag2021_progan,logits_Grag2021_latent,logits_Grag2021_latent,resnet50nodown_progan,resnet50nodown_progan,resnet50nodown_stylegan2,resnet50nodown_stylegan2,exif_model,exif_model,aggregate,aggregate
Unnamed: 0_level_1,accuracy,auc,accuracy,auc,accuracy,auc,accuracy,auc,accuracy,auc,accuracy,auc
..\csv\dalle-2.csv,0.605,0.959903,0.507,0.658364,0.692,0.955195,0.5035,0.79498,0.5,,0.733,0.733
..\csv\dalle-3.csv,0.5175,0.684124,0.857,0.853372,0.499,0.649602,0.5025,0.647201,0.5,,0.846,0.846
..\csv\hdr.csv,0.5205,0.772063,0.858,0.936342,0.493,0.579278,0.5035,0.736739,0.5,,0.844,0.844
..\csv\midjourney.csv,0.5315,0.835863,0.984,0.994271,0.6105,0.87229,0.505,0.769751,0.5,,0.9755,0.9755
..\csv\mscoco2014.csv,0.5175,0.684124,0.857,0.853372,0.499,0.649602,0.5025,0.647201,0.5,,0.846,0.846
..\csv\vqgan.csv,0.686,0.944237,0.792,0.956349,0.7185,0.935043,0.7065,0.940283,0.5,,0.8875,0.8875
