# Inspecting the Stool Image Dataset

This notebook counts the number of data points (images) in each class and visualizes the distribution.

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# Path to the unzipped dataset
DATA_DIR = "path_to_your_unzipped_dataset/data"  # <-- update this path

# List class folders
class_folders = sorted([d for d in os.listdir(DATA_DIR) if os.path.isdir(os.path.join(DATA_DIR, d))])

# Count images per class
class_counts = {}
for class_name in class_folders:
    class_path = os.path.join(DATA_DIR, class_name)
    # Count only files (ignore hidden/system files)
    files = [f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
    class_counts[class_name] = len(files)

# Create a DataFrame for display
df_counts = pd.DataFrame.from_dict(class_counts, orient='index', columns=['Count'])
df_counts.index.name = "Class"
df_counts = df_counts.reset_index()

# Display the counts
print("Number of images per class:")
display(df_counts)

# Plot the distribution
plt.figure(figsize=(8, 5))
plt.bar(df_counts['Class'], df_counts['Count'], color='skyblue')
plt.xlabel("Bristol Stool Type")
plt.ylabel("Number of Images")
plt.title("Dataset Distribution by Class")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()