In [1]:
import os
import pandas as pd
from collections import defaultdict
import sys

sys.path.append('..')

# Set the path to your images_reshaped directory
base_path = '../images_reshaped'

# Dictionary to store species and their categories
species_categories = defaultdict(set)

# Dictionary to store species and their image counts
species_counts = defaultdict(int)

# Iterate through the directory structure
for category in ['deadly', 'edible', 'poisonous', 'conditionally_edible']:
    category_path = os.path.join(base_path, category)
    for species_folder in os.listdir(category_path):
        species_path = os.path.join(category_path, species_folder)
        if os.path.isdir(species_path):
            species_categories[species_folder].add(category)
            species_counts[species_folder] += len([f for f in os.listdir(species_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))])

# Create a DataFrame for analysis
df = pd.DataFrame({
    'species': list(species_categories.keys()),
    'categories': [', '.join(sorted(cats)) for cats in species_categories.values()],
    'category_count': [len(cats) for cats in species_categories.values()],
    'image_count': [species_counts[species] for species in species_categories.keys()]
})

# Sort by species name for easier reading
df = df.sort_values('species').reset_index(drop=True)

# Display summary statistics
print(f"Total unique species found: {len(df)}")
print(f"Species appearing in multiple categories: {sum(df['category_count'] > 1)}")

# Display species appearing in multiple categories
print("\nSpecies appearing in multiple categories:")
print(df[df['category_count'] > 1].to_string(index=False))

# Display top 10 species by image count
print("\nTop 10 species by image count:")
print(df.nlargest(10, 'image_count').to_string(index=False))

# Save the full analysis to a CSV file
df.to_csv('species_analysis.csv', index=False)
print("\nFull analysis saved to 'species_analysis.csv'")

Total unique species found: 247
Species appearing in multiple categories: 14

Species appearing in multiple categories:
                 species                      categories  category_count  image_count
       Amanita_smithiana               deadly, poisonous               2           53
       Armillaria_mellea               edible, poisonous               2           84
      Clitocybe_dealbata               deadly, poisonous               2           63
Coprinopsis_atramentaria conditionally_edible, poisonous               2           94
       Entoloma_sinuatum               deadly, poisonous               2           62
   Hypholoma_fasciculare               deadly, poisonous               2           65
    Lactarius_torminosus               deadly, poisonous               2           62
     Omphalotus_illudens               deadly, poisonous               2           70
    Omphalotus_japonicus               deadly, poisonous               2           58
       Pholiotina_ru