##Script to Check for Data in Result Files

Drive Connection

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Libraries

In [2]:
import h5py
import os

Files

In [3]:
file_names = [
    '/content/drive/MyDrive/Worms/L1_featuresN.hdf5',
    '/content/drive/MyDrive/Worms/L1_intensities.hdf5',
    '/content/drive/MyDrive/Worms/L1_skeletons.hdf5',
    '/content/drive/MyDrive/Worms/L2_featuresN.hdf5',
    '/content/drive/MyDrive/Worms/L2_intensities.hdf5',
    '/content/drive/MyDrive/Worms/L2_skeletons.hdf5',
    '/content/drive/MyDrive/Worms/n2_featuresN.hdf5',
    '/content/drive/MyDrive/Worms/n2_intensities.hdf5',
    '/content/drive/MyDrive/Worms/n2_skeletons.hdf5'
]

Verifying File Content

In [4]:
for file_name in file_names:
    print(f"\n{'='*30}")
    print(f"Analyzing file: {file_name}")
    print(f"{'='*30}")
    try:
        with h5py.File(file_name, 'r') as hdfid:
            print(hdfid)
            print("\nHDF5 File Structure:")
            def print_name(name):
                print(f"  {name}")

            hdfid.visit(print_name)

            has_datasets = False
            print("\nContents of the datasets:")
            for key in hdfid.keys():
                if isinstance(hdfid[key], h5py.Dataset):
                    has_datasets = True
                    dataset = hdfid[key] # Get the dataset object
                    data_shape = dataset.shape
                    data_dtype = dataset.dtype
                    data_size = dataset.size

                    print(f"  Dataset: {key}")
                    print(f"    Shape: {data_shape}")
                    print(f"    Data Type: {data_dtype}")
                    print(f"    Number of Elements: {data_size}")
                    if dataset.ndim <= 2 and data_size < 100:
                        print("    First rows:")
                        print(dataset[:5])
                    else:
                        print("    (Dataset too large to print completely)")
                elif isinstance(hdfid[key], h5py.Group):
                    print(f"  Group: {key}")

            if not has_datasets and not hdfid.keys():
                print("  The file does not contain top-level datasets.")
            elif not has_datasets:
                print("  The file does not contain top-level datasets, only groups (see structure).")

    except FileNotFoundError:
        print(f"Error: File '{file_name}' not found.")
    except Exception as e:
        print(f"An error occurred while opening or reading the file: {e}")


Analyzing file: /content/drive/MyDrive/Worms/L1_featuresN.hdf5
<HDF5 file "L1_featuresN.hdf5" (mode r)>

HDF5 File Structure:
  blob_features
  coordinates
  coordinates/dorsal_contours
  coordinates/skeletons
  coordinates/ventral_contours
  coordinates/widths
  features_stats
  provenance_tracking
  provenance_tracking/FEAT_INIT
  provenance_tracking/FEAT_TIERPSY
  timeseries_data
  trajectories_data

Contents of the datasets:
  Dataset: blob_features
    Shape: (21223,)
    Data Type: [('coord_x', '<f8'), ('coord_y', '<f8'), ('area', '<f8'), ('perimeter', '<f8'), ('box_length', '<f8'), ('box_width', '<f8'), ('quirkiness', '<f4'), ('compactness', '<f4'), ('box_orientation', '<f4'), ('solidity', '<f4'), ('intensity_mean', '<f4'), ('intensity_std', '<f4'), ('hu0', '<f4'), ('hu1', '<f4'), ('hu2', '<f4'), ('hu3', '<f4'), ('hu4', '<f4'), ('hu5', '<f4'), ('hu6', '<f4')]
    Number of Elements: 21223
    (Dataset too large to print completely)
  Group: coordinates
  Dataset: features_stats

In [5]:
hdFileName = '/content/drive/MyDrive/Worms/L1_featuresN.hdf5'
dataset_name = 'blob_features'

try:
    with h5py.File(hdFileName, 'r') as hdfid:
        if dataset_name in hdfid:
            dataset = hdfid[dataset_name]
            print(f"Dataset: {dataset_name}")
            print(f"  Shape: {dataset.shape}")
            print(f"  Data Type (dtype): {dataset.dtype}")
            print("  First 5 rows:")
            print(dataset[:5])
        else:
            print(f"Error: The dataset '{dataset_name}' was not found in the file '{hdFileName}'.")

except FileNotFoundError:
    print(f"Error: The file '{hdFileName}' was not found.")
except Exception as e:
    print(f"An error occurred while opening or reading the file: {e}")

Dataset: blob_features
  Shape: (21223,)
  Data Type (dtype): [('coord_x', '<f8'), ('coord_y', '<f8'), ('area', '<f8'), ('perimeter', '<f8'), ('box_length', '<f8'), ('box_width', '<f8'), ('quirkiness', '<f4'), ('compactness', '<f4'), ('box_orientation', '<f4'), ('solidity', '<f4'), ('intensity_mean', '<f4'), ('intensity_std', '<f4'), ('hu0', '<f4'), ('hu1', '<f4'), ('hu2', '<f4'), ('hu3', '<f4'), ('hu4', '<f4'), ('hu5', '<f4'), ('hu6', '<f4')]
  First 5 rows:
[(1054.58703613, 1563.48974609, 410.5, 137.43859863, 49.01330948, 17.63538551, 0.9330263 , 0.27308986, -70.14479 , 0.5864286 , 115.01068, 9.011264, 0.4606221 , 0.1471695 , 0.00379843, 0.00109636, 1.9368704e-06, 0.00016743, -1.1199185e-06)
 (1054.58618164, 1564.46142578, 373.5, 137.78173828, 48.50436401, 17.17677498, 0.9351968 , 0.24723908, -69.14554 , 0.5693598 , 114.46296, 8.518961, 0.494695  , 0.16593784, 0.0041463 , 0.00452903, 1.9620735e-05, 0.00183178,  4.6712040e-07)
 (1054.74133301, 1564.10339355, 396. , 143.6812439 , 48.97