# Training a Convolutional Neural Network for Kidney Stones Object Detection

In [1]:
%pip install tabulate

Collecting tabulate
  Using cached tabulate-0.9.0-py3-none-any.whl (35 kB)
Installing collected packages: tabulate
Successfully installed tabulate-0.9.0
Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'C:\Users\Brian\OneDrive - University of Witwatersrand\Desktop\off_the_grid\projects\ebook_object_detection\chapters\kidney_stones\venv\Scripts\python.exe -m pip install --upgrade pip' command.


In [2]:
import os
from tabulate import tabulate

# Define the path to the data directory
data_dir = 'data'
sets = ['train', 'test', 'valid']

# Initialize a summary list to store the data for tabulation
data_summary = []

# Function to get the total file size and count of files in a directory
def get_data_size(directory):
    total_size = 0
    total_files = 0
    for dirpath, dirnames, filenames in os.walk(directory):
        total_files += len(filenames)
        total_size += sum(os.path.getsize(os.path.join(dirpath, f)) for f in filenames)
    return total_files, total_size

# Gather the data size information for each dataset
for data_set in sets:
    images_dir = os.path.join(data_dir, data_set, 'images')
    labels_dir = os.path.join(data_dir, data_set, 'labels')

    # Get the number of image files and their total size
    num_image_files, image_total_size = get_data_size(images_dir)
    
    # Get the number of label files and their total size
    num_label_files, label_total_size = get_data_size(labels_dir)
    
    # Add the data summary for the current dataset
    data_summary.append([
        data_set.capitalize(),
        num_image_files,
        num_label_files,
        f"{image_total_size / (1024 * 1024):.2f} MB",  # Convert bytes to MB
        f"{label_total_size / (1024 * 1024):.2f} MB"  # Convert bytes to MB
    ])

# Define the table headers
headers = ["Dataset", "Number of Images", "Number of Labels", "Total Image Size", "Total Label Size"]

# Print the table
print(tabulate(data_summary, headers=headers, tablefmt="grid"))

+-----------+--------------------+--------------------+--------------------+--------------------+
| Dataset   |   Number of Images |   Number of Labels | Total Image Size   | Total Label Size   |
| Train     |               1054 |               1054 | 16.43 MB           | 0.13 MB            |
+-----------+--------------------+--------------------+--------------------+--------------------+
| Test      |                123 |                123 | 2.03 MB            | 0.01 MB            |
+-----------+--------------------+--------------------+--------------------+--------------------+
| Valid     |                123 |                123 | 2.05 MB            | 0.02 MB            |
+-----------+--------------------+--------------------+--------------------+--------------------+
