In [1]:
import os

data_folder = "Data"

# Dictionary to store image paths
image_paths = {
    "Non Demented": [],
    "Very mild Dementia": [],
    "Mild Dementia": [],
    "Moderate Dementia": []
}

# Traverse through the folder structure
for root, dirs, files in os.walk(data_folder):
    for file in files:
        if file.endswith(".jpg"):
            # Extract dementia level from folder name
            dementia_level = os.path.basename(root)
            # Extract subject ID from file name
            subject_id = file.split("_")[0]
            # Extract image slice number
            slice_number = int(file.split("_")[-1].split(".")[0])
            # Add image path to the corresponding dementia level
            image_paths[dementia_level].append(os.path.join(root, file))

# Print the number of images for each dementia level
for level, paths in image_paths.items():
    print(f"{level}: {len(paths)} images")

# Example of accessing image paths for a specific dementia level
# For example, to access image paths for Non Demented:
# non_demented_image_paths = image_paths["Non Demented"]


Non Demented: 67222 images
Very mild Dementia: 13725 images
Mild Dementia: 5002 images
Moderate Dementia: 488 images


In [2]:
import os
from collections import defaultdict

def count_images_per_category(image_paths):
    """Count the number of images per dementia category."""
    category_counts = {}
    for category, paths in image_paths.items():
        category_counts[category] = len(paths)
    return category_counts

def count_scans_per_patient(image_paths):
    """Count the number of scans per patient."""
    scans_per_patient = defaultdict(int)
    for paths in image_paths.values():
        for path in paths:
            subject_id = os.path.basename(path).split("_")[0]
            scans_per_patient[subject_id] += 1
    return scans_per_patient

def check_image_formats(image_paths):
    """Check if all images have the same format."""
    formats = set()
    for paths in image_paths.values():
        for path in paths:
            format_info = path.split("_")[3]
            formats.add(format_info)
    return len(formats) == 1

def analyze_dataset(image_paths):
    """Analyze the dataset and provide insights."""
    # Count images per category
    category_counts = count_images_per_category(image_paths)
    print("Number of images per dementia category:")
    for category, count in category_counts.items():
        print(f"{category}: {count}")

    # Count scans per patient
    scans_per_patient = count_scans_per_patient(image_paths)
    print("\nNumber of scans per patient:")
    for patient_id, num_scans in scans_per_patient.items():
        print(f"Patient ID: {patient_id}, Number of scans: {num_scans}")

    # Check if all images have the same format
    same_format = check_image_formats(image_paths)
    if same_format:
        print("\nAll images have the same format.")
    else:
        print("\nNot all images have the same format.")

    # Other potentially interesting insights
    num_patients = len(scans_per_patient)
    total_images = sum(category_counts.values())
    average_scans_per_patient = total_images / num_patients
    print(f"\nAverage scans per patient: {average_scans_per_patient:.2f}")
    print(f"Total number of images: {total_images}")
    print(f"Average number of images per patient: {total_images / num_patients:.2f}")

# Assuming image_paths dictionary has been loaded previously
# image_paths = { "Non Demented": [...], "Very mild Dementia": [...], ... }

# Analyze the dataset
analyze_dataset(image_paths)


Number of images per dementia category:
Non Demented: 67222
Very mild Dementia: 13725
Mild Dementia: 5002
Moderate Dementia: 488

Number of scans per patient:
Patient ID: OAS1, Number of scans: 86437

Not all images have the same format.

Average scans per patient: 86437.00
Total number of images: 86437
Average number of images per patient: 86437.00
