In [1]:
# Step 4 
# Count the number of images in the set exported from CVAT 
# Count the 
#      - redflag 
#      - red and yellow flags 
#      - blue flags 
#  in the resulting  dataset after running SSIM  / Comparison to filter out very similar items 

import os
import pandas as pd
import re
from collections import defaultdict  # For counting classes in annotations

def count_images_in_folders(parent_folder, csv_name, imagecountcolname):
    # Initialize a list to store data
    data = []
    
    # Get all job folders with "Job_" prefix and sort numerically
    job_folders = sorted(
        [f for f in os.listdir(parent_folder) if f.startswith("Job_")],
        key=lambda x: int(re.search(r'\d+', x).group())
    )

    # Loop through each sorted job folder
    for job_folder in job_folders:
        job_path = os.path.join(parent_folder, job_folder)
        obj_train_data_path = os.path.join(job_path, 'obj_train_data')

        # Check if obj_train_data folder exists
        if os.path.isdir(obj_train_data_path):
            # Count the images in obj_train_data (case-insensitive)
            image_count = len([f for f in os.listdir(obj_train_data_path) if f.lower().endswith(('.jpg', '.png'))])
            # Append the data
            data.append({'Job': job_folder, imagecountcolname: image_count})

    # Create a DataFrame and export to CSV
    df = pd.DataFrame(data)
    df.to_csv(csv_name, index=False)

    print(f"CSV file created at {csv_name}")

exportcountCol = 'Exported from CVAT'
filtercountCol = 'After filtering using SSIM'

# Count the iamge exported from CVAT 
#count_images_in_folders(r'D:\FlagDetectionDatasets\ExportedDatasetsExtracted', 'count_exported_from_cvat.csv', exportcountCol)
# Count the iMAGES after running SSIM  
#count_images_in_folders(r'D:\FlagDetectionDatasets\ExportedDatasetsExtractedStage2', 'count_after_comparison.csv', filtercountCol)

def merge_csv_files(csv_file1, csv_file2, output_csv):
    # Read the CSV files
    df1 = pd.read_csv(csv_file1)
    df2 = pd.read_csv(csv_file2)
    
    # Merge the DataFrames on the 'Job' column
    merged_df = pd.merge(df1, df2, on='Job', how='inner')  # Use 'inner' for rows that exist in both files

    # Calculate the percentage of 'After filtering using SSIM' relative to 'Exported from CVAT'
    merged_df['Retained %'] = ((merged_df[filtercountCol] / merged_df[exportcountCol]) * 100).round(2)

    # exportcountCol = 'Exported from CVAT'
    # filtercountCol = 'After filtering using SSIM'

    # Sum the specified columns and add as a new row
    summary_row = {
        'Job': 'Total',
         exportcountCol: merged_df[exportcountCol].sum(),
         filtercountCol: merged_df[filtercountCol].sum(),
        'SSIM Percentage': ''  # Leave blank or calculate overall percentage if desired
    }
    #merged_df = merged_df.append(summary_row, ignore_index=True)
    # Concatenate the summary row
    #merged_df = pd.concat([merged_df, summary_row], ignore_index=True)
    merged_df = pd.concat([merged_df, pd.DataFrame([summary_row])], ignore_index=True)

    # Export to CSV
    merged_df.to_csv(output_csv, index=False)
    print(f"CSV saved to {output_csv}")
  
    # Save the merged DataFrame to a new CSV file
    merged_df.to_csv(output_csv, index=False)
    
    print(f"Merged CSV saved as {output_csv}")

# MERGE THE TWO csv FILES
# merge_csv_files('count_exported_from_cvat.csv', 'count_after_comparison.csv', 'counts_after_export.csv')

# Path to the parent folder
#parent_folder = r'D:\FlagDetectionDatasets\ExportedDatasetsExtracted'
#csv_path = 'exported_from_cvat_image_count.csv'

# Count the number of instances of each class in each folder 
# Example annotation 
#1 0.434852 0.648463 0.111932 0.191796
#2 0.476599 0.415176 0.080208 0.271296
#4 0.538021 0.612153 0.039479 0.770676

# Define the classes
classes = ["red_flag", "red_yellow_flag", "blue_flag", "green_coast", "flagpole"]
class_counts = defaultdict(int)

def count_annotations(parent_folder):
      
    class_counts = defaultdict(int)  # Dictionary to store counts for each class

    for job_folder in os.listdir(parent_folder):
        job_path = os.path.join(parent_folder, job_folder, 'obj_train_data')
        print(f"{job_folder} annotations:")
        
        if not os.path.isdir(job_path):
            continue  # Skip if it's not a directory

        for filename in os.listdir(job_path):
            if filename.endswith(".txt"):
                with open(os.path.join(job_path, filename), 'r') as file:
                    for line in file:
                        # Extract class ID (first number on each line)
                        class_id = int(line.split()[0])
                        # Update count for this class
                        if class_id < len(classes):
                            class_counts[classes[class_id]] += 1

        # Print the class counts
        for cls in classes:
            print(f" - {cls}: {class_counts[cls]}")
     
    print(f"----------------------- ")

# Print the class counts
    for cls in classes:
        print(f"{cls}: {class_counts[cls]}")
        
# Print the class counts
#for cls in classes:
#   print(f"{cls}: {class_counts[cls]}")
# Print for folder 
#      for class_id, count in class_counts.items():
#         print(f" - Class {class_id}: {count} annotations")

# Print results
#for class_id, count in class_counts.items():
#   print(f"-------------------------------------")
#  print(f"Class {class_id}: {count} annotations")

# set folder tocount 
#parent_folder = r'D:\FlagDetectionDatasets\ExportedDatasetsExtracted'
#parent_folder = r'D:\FlagDetectionDatasets\ExportedDatasetsExtractedStage2'

count_annotations(parent_folder)
#count_images_in_folders(r'D:\FlagDetectionDatasets\ExportedDatasetsExtractedStage2', 'count_after_comparison.csv', filtercountCol)

NameError: name 'parent_folder' is not defined