In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
project_directory = os.path.dirname(os.getcwd())
data = pd.read_csv(os.path.join(project_directory, "predictions.csv"))
converted_folder_name = "converted"
data_directory = os.path.join(project_directory, "data")
converted_directory = os.path.join(data_directory, converted_folder_name)

# Filter out images with less than 50 non-zero pixels and lower than 3500 pixels
data = data[(data['NonZeroPixels'] > 50) & (data['NonZeroPixels'] < 3500)]

Histogram

In [None]:
def plot_class_distribution(data, class_name, ax, accuracy_threshold=0.6):
    filtered_data = data[(data['NonZeroPixels'] > 80) & (data['NonZeroPixels'] < 3500) & (data[class_name] >= accuracy_threshold)]
    
    sns.histplot(filtered_data[class_name], bins=20, kde=True, ax=ax)
    ax.set_title(f'Distribution of {class_name} (Accuracy >= {accuracy_threshold})')
    ax.set_xlabel('Accuracy')
    ax.set_ylabel('Count')
    ax.set_xlim(0, 1)

# Load the data
project_directory = os.path.dirname(os.getcwd())
data = pd.read_csv(os.path.join(project_directory, "predictions.csv"))

# Get the list of class columns dynamically
class_columns = [col for col in data.columns if col not in ['Filename', 'NonZeroPixels', 'Loss']]

# Create a single plot with multiple subplots stacked horizontally with a maximum of 3 subplots per row
num_plots = len(class_columns)
num_rows = (num_plots // 3) + (1 if num_plots % 3 != 0 else 0)
fig, axes = plt.subplots(nrows=num_rows, ncols=min(num_plots, 3), figsize=(18, 6*num_rows))

for i, class_name in enumerate(class_columns):
    plot_class_distribution(data, class_name, ax=axes[i // 3, i % 3], accuracy_threshold=0.6)

plt.tight_layout()
plt.show()

In [None]:
def plot_top_images(data, class_name, converted_directory, num_images=25):
    top_images = data.sort_values(by=class_name, ascending=False).head(num_images)

    fig, axes = plt.subplots(nrows=5, ncols=5, figsize=(14, 12))
    for i, (index, row) in enumerate(top_images.iterrows()):
        img = plt.imread(os.path.join(converted_directory, row['Filename']))
        axes[i // 5, i % 5].imshow(img)
        axes[i // 5, i % 5].set_title(f"{row['Filename']} - {row[class_name]:.4f}")
        axes[i // 5, i % 5].axis('off')

    plt.tight_layout()
    plt.show()

In [None]:
plot_top_images(data, 'Class8.1_accuracy', converted_directory, num_images=25)