In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import seaborn as sns

# Load the data
model_results_df = pd.read_csv('path/to/model_results.csv')
true_labels_df = pd.read_csv('path/to/true_labels.csv')

# Merge datasets on the filepath
merged_df = pd.merge(model_results_df, true_labels_df, on='Filepath')

# Convert columns to appropriate types if necessary
# For example, if your true labels are in a column 'TrueLabel'
merged_df['TrueLabel'] = merged_df['TrueLabel'].astype('category')

# Define a function to calculate metrics
def calculate_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    return accuracy, precision, recall, f1

# Calculate metrics for each model
metrics = {}
for model in ['DMDetectorResults', 'GANDetectorResults', 'ExifDetectorResults']:
    y_true = merged_df['TrueLabel']
    y_pred = merged_df[model + 'InferenceResultsIs_diffusion_imag'] # Adjust based on your column names
    metrics[model] = calculate_metrics(y_true, y_pred)

# Convert the metrics dictionary to a DataFrame for easy viewing
metrics_df = pd.DataFrame(metrics, index=['Accuracy', 'Precision', 'Recall', 'F1 Score'])

# Display the metrics
print(metrics_df)

# Visualizing the performance
plt.figure(figsize=(10, 6))
sns.barplot(data=metrics_df.T)
plt.title('Model Performance Comparison')
plt.ylabel('Score')
plt.show()
