In [7]:
import os

# Path to the directory containing the data
#data_dir = 'F:/USFWS_Cameras/Data/Sevilleta/Sevilleta09_20_MD/animal/'
data_dir = "F:\\USFWS_Cameras\\Data\\BdA\\BdA_MD\\animal"
#data_dir = "F:\\USFWS_Cameras\\Data\\Red Rock WMA\\Redrock_MD\\animal"
#data_dir = "F:\\USFWS_Cameras\\Data\\Wichita Mnts\\WichitaMtns_MD\\animal"

# Path to the classes.txt file
classes_file = os.path.join(data_dir, 'classes.txt')

# Output file path
output_file = 'BdA_label_statistics.txt'

# Function to read the species mapping from classes.txt
def read_species_mapping(file_path):
    species_mapping = {}
    with open(file_path, 'r') as file:
        for line in file:
            values = line.strip().split()
            species = ' '.join(values[:-1])
            code = int(values[-1])
            species_mapping[species] = code
    return species_mapping

# Function to traverse the directory structure and collect label statistics
def traverse_directories(directory, species_mapping, label_stats):
    species_dirs = os.listdir(directory)
    for species_dir in species_dirs:
        species_path = os.path.join(directory, species_dir)
        if not os.path.isdir(species_path):
            continue

        if species_dir.isdigit():
            labels = [file for file in os.listdir(species_path) if file.endswith('.txt')]
            num_labels = len(labels)
            species_name = os.path.basename(os.path.dirname(species_path))
            if species_name in species_mapping:
                if species_name not in label_stats:
                    label_stats[species_name] = 0
                label_stats[species_name] += num_labels

        else:
            traverse_directories(species_path, species_mapping, label_stats)

# Function to get the YOLO labels and count statistics
def get_label_statistics(data_dir, species_mapping):
    cameras = os.listdir(data_dir)
    total_labels = 0
    species_counts = {}

    with open(output_file, 'w') as file:
        for camera in cameras:
            camera_path = os.path.join(data_dir, camera)
            if not os.path.isdir(camera_path):
                continue

            label_stats = {}
            traverse_directories(camera_path, species_mapping, label_stats)

            file.write(f"Camera: {camera}\n")
            file.write("Number of species represented: {}\n".format(len(label_stats)))
            file.write("Number of labels per species:\n")
            num_labels_camera = sum(label_stats.values())
            for species_name, num_labels in label_stats.items():
                if num_labels_camera > 0:
                    percentage = (num_labels / num_labels_camera) * 100
                else:
                    percentage = 0.0
                file.write(f"{species_name}: {num_labels} ({percentage:.2f}%)\n")
            file.write("Total number of labels: {}\n\n".format(num_labels_camera))

            total_labels += num_labels_camera
            for species_name, num_labels in label_stats.items():
                if species_name not in species_counts:
                    species_counts[species_name] = 0
                species_counts[species_name] += num_labels

        file.write("Summary (All Cameras Combined):\n")
        file.write("Number of species represented: {}\n".format(len(species_counts)))
        file.write("Number of labels per species:\n")
        for species_name, num_labels in species_counts.items():
            if total_labels > 0:
                percentage = (num_labels / total_labels) * 100
            else:
                percentage = 0.0
            file.write(f"{species_name}: {num_labels} ({percentage:.2f}%)\n")
        file.write("Total number of labels: {}\n".format(total_labels))

# Read species mapping from classes.txt
species_mapping = read_species_mapping(classes_file)

# Get label statistics
get_label_statistics(data_dir, species_mapping)