#Analysis of the Results File: L1_intensities Obtained via Tierpsy Tracker

Drive connection

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Libraries

In [2]:
import h5py
import os
import pandas as pd
import numpy as np

Inspecting file content

In [4]:
def inspect_hdf5_datasets(file_path):
    """
    Inspects an HDF5 file and prints the names and sizes of its datasets.

    Args:
        file_path (str): The path to the HDF5 file.
    """
    print(f"\n{'='*30}")
    print(f"Inspecting file: {file_path}")
    print(f"{'='*30}")
    try:
        if not os.path.exists(file_path):
            print(f"Error: The file '{file_path}' was not found.")
            return  # Exit the function if the file doesn't exist

        with h5py.File(file_path, 'r') as hdfid:
            datasets_info = {}
            for name, obj in hdfid.items():
                if isinstance(obj, h5py.Dataset):
                    datasets_info[name] = obj.size

            if datasets_info:
                print("Datasets found and their sizes:")
                for name, size in datasets_info.items():
                    print(f"  Dataset: {name}, Size: {size} elements")
            else:
                print("No top-level datasets were found in this file.")

    except Exception as e:
        print(f"An error occurred while processing the file '{file_path}': {e}")

# Define the file path you want to analyze here.  REPLACE THIS!
file_to_analyze = '/content/drive/MyDrive/Worms/Resultados/L1/L1_intensities/L1_intensities.hdf5'  # <--- REPLACE THIS LINE

inspect_hdf5_datasets(file_to_analyze)


Inspecting file: /content/drive/MyDrive/Worms/Resultados/L1/L1_intensities/L1_intensities.hdf5
Datasets found and their sizes:
  Dataset: straighten_worm_intensity_median, Size: 2245733 elements
  Dataset: trajectories_data_valid, Size: 17143 elements


Exporting the datasets to individuals CSV files

In [6]:
def export_hdf5_datasets_to_csv(file_path, output_dir):
    """
    Exports all datasets from an HDF5 file to individual CSV files.

    Args:
        file_path (str): The path to the HDF5 file.
        output_dir (str): The path to the directory where the CSV files will be saved.
    """
    print(f"\n{'='*30}")
    print(f"Exporting all datasets from: {file_path}")
    print(f"CSV files will be saved in: {output_dir}")
    print(f"{'='*30}")

    try:
        if not os.path.exists(file_path):
            print(f"Error: The file '{file_path}' was not found.")
            return

        # Create the output directory if it doesn't exist
        os.makedirs(output_dir, exist_ok=True)

        with h5py.File(file_path, 'r') as hdf_file:
            for dataset_name in hdf_file:
                if isinstance(hdf_file[dataset_name], h5py.Dataset):
                    print(f"\nProcessing dataset: {dataset_name}")
                    dataset = hdf_file[dataset_name]
                    data = dataset[:]  # Read all data

                    # Convert to Pandas DataFrame
                    df = pd.DataFrame(data)

                    # Construct the CSV file path
                    csv_file_path = os.path.join(output_dir, f"{dataset_name}.csv")

                    # Export to CSV
                    df.to_csv(csv_file_path, index=False)
                    print(f"Dataset '{dataset_name}' successfully exported to: {csv_file_path}")
                else:
                    print(f"Skipping: '{dataset_name}' is not a dataset.")

    except Exception as e:
        print(f"An error occurred: {e}")

    print("\nData export process complete.")



if __name__ == "__main__":
    # Example usage:  MODIFY THESE PATHS APPROPRIATELY
    hdf5_file_path = '/content/drive/MyDrive/Worms/Resultados/L1/L1_intensities/L1_intensities.hdf5'  # <--- Replace with your HDF5 file path
    csv_output_directory = '/content/drive/MyDrive/Worms/Resultados/L1/L1_intensities/Datasets'  # <--- Replace with the desired output folder

    export_hdf5_datasets_to_csv(hdf5_file_path, csv_output_directory)


Exporting all datasets from: /content/drive/MyDrive/Worms/Resultados/L1/L1_intensities/L1_intensities.hdf5
CSV files will be saved in: /content/drive/MyDrive/Worms/Resultados/L1/L1_intensities/Datasets
Skipping: 'provenance_tracking' is not a dataset.

Processing dataset: straighten_worm_intensity_median
Dataset 'straighten_worm_intensity_median' successfully exported to: /content/drive/MyDrive/Worms/Resultados/L1/L1_intensities/Datasets/straighten_worm_intensity_median.csv

Processing dataset: trajectories_data_valid
Dataset 'trajectories_data_valid' successfully exported to: /content/drive/MyDrive/Worms/Resultados/L1/L1_intensities/Datasets/trajectories_data_valid.csv

Data export process complete.


Inspecting the provenance_tracking group

In [7]:
def inspect_provenance_tracking(file_path):
    """
    Inspects the 'provenance_tracking' group in an HDF5 file and prints its contents.

    Args:
        file_path (str): The path to the HDF5 file.
    """
    try:
        with h5py.File(file_path, 'r') as hdf_file:
            if 'provenance_tracking' in hdf_file:
                provenance_group = hdf_file['provenance_tracking']
                print("\nInspecting 'provenance_tracking' group:")

                # Print attributes of the group
                print("\nAttributes:")
                for attr_name, attr_value in provenance_group.attrs.items():
                    print(f"  {attr_name}: {attr_value}")

                # Print items within the group (datasets or subgroups)
                print("\nItems within the group:")
                for item_name, obj in provenance_group.items():
                    if isinstance(obj, h5py.Dataset):
                        print(f"  Dataset: {item_name}, Shape: {obj.shape}, Data Type: {obj.dtype}")
                    elif isinstance(obj, h5py.Group):
                        print(f"  Group: {item_name}")
                    else:
                        print(f"  Other: {item_name}, Type: {type(obj)}")
            else:
                print("\n'provenance_tracking' group not found in the HDF5 file.")

    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    # Replace with the path to your HDF5 file
    hdf5_file_path = '/content/drive/MyDrive/Worms/Resultados/L1/L1_intensities/L1_intensities.hdf5'
    inspect_provenance_tracking(hdf5_file_path)


Inspecting 'provenance_tracking' group:

Attributes:
  CLASS: b'GROUP'
  TITLE: Empty(dtype=dtype('S1'))
  VERSION: b'1.0'

Items within the group:
  Dataset: INT_PROFILE, Shape: (), Data Type: |S1047


Exporting the INT_PROFILE dataset

In [8]:
def export_int_profile(file_path, output_dir):
    """
    Exports the 'INT_PROFILE' dataset from the 'provenance_tracking' group in an HDF5 file to a CSV file.

    Args:
        file_path (str): The path to the HDF5 file.
        output_dir (str): The directory where the CSV file will be saved.
    """
    print(f"\n{'='*30}")
    print(f"Exporting INT_PROFILE from: {file_path}")
    print(f"CSV file will be saved in: {output_dir}")
    print(f"{'='*30}")

    try:
        if not os.path.exists(file_path):
            print(f"Error: The file '{file_path}' was not found.")
            return

        # Create the output directory if it doesn't exist
        os.makedirs(output_dir, exist_ok=True)

        with h5py.File(file_path, 'r') as hdf_file:
            if 'provenance_tracking' in hdf_file:
                provenance_group = hdf_file['provenance_tracking']
                if 'INT_PROFILE' in provenance_group:
                    print(f"\nProcessing dataset: INT_PROFILE")
                    int_profile_data = provenance_group['INT_PROFILE'][()]  # Read the scalar value

                    # Convert to Pandas DataFrame
                    df = pd.DataFrame([int_profile_data])

                    # Construct the CSV file path
                    csv_file_path = os.path.join(output_dir, "INT_PROFILE.csv")

                    # Export to CSV
                    df.to_csv(csv_file_path, index=False)
                    print(f"Dataset 'INT_PROFILE' successfully exported to: {csv_file_path}")
                else:
                    print(f"Error: 'INT_PROFILE' dataset not found in 'provenance_tracking' group.")
            else:
                print(f"Error: 'provenance_tracking' group not found in the HDF5 file.")

    except Exception as e:
        print(f"An error occurred: {e}")

    print("\nData export process complete.")


if __name__ == "__main__":
    # Example usage:  MODIFY THESE PATHS APPROPRIATELY
    hdf5_file_path = '/content/drive/MyDrive/Worms/Resultados/L1/L1_intensities/L1_intensities.hdf5'  # <--- Replace with your HDF5 file path
    csv_output_directory = '/content/drive/MyDrive/Worms/Resultados/L1/L1_intensities/Datasets'  # <--- Replace with the desired output folder

    export_int_profile(hdf5_file_path, csv_output_directory)


Exporting INT_PROFILE from: /content/drive/MyDrive/Worms/Resultados/L1/L1_intensities/L1_intensities.hdf5
CSV file will be saved in: /content/drive/MyDrive/Worms/Resultados/L1/L1_intensities/Datasets

Processing dataset: INT_PROFILE
Dataset 'INT_PROFILE' successfully exported to: /content/drive/MyDrive/Worms/Resultados/L1/L1_intensities/Datasets/INT_PROFILE.csv

Data export process complete.


Visualization of each dataset

In [9]:
csv_output_directory = '/content/drive/MyDrive/Worms/Resultados/L1/L1_intensities/Datasets'

print(f"\n{'='*30}")
print(f"Visualizing the first 5 rows of the datasets in: {csv_output_directory}")
print(f"{'='*30}")
try:
    # Check if the output directory exists
    if not os.path.exists(csv_output_directory):
        print(f"Error: The directory '{csv_output_directory}' was not found.")
        exit()

    # Iterate through all files in the specified directory
    for filename in os.listdir(csv_output_directory):
        if filename.endswith(".csv"):  # Check if the file is a CSV file
            file_path = os.path.join(csv_output_directory, filename)
            try:
                # Read the CSV file into a Pandas DataFrame
                df = pd.read_csv(file_path)

                print(f"\n--- File: {filename} ---")
                print("First 5 rows:")
                if not df.empty:
                    print(df.head())
                else:
                    print("The DataFrame is empty.")

            except pd.errors.EmptyDataError:
                print(f"Warning: The file '{filename}' is empty.")
            except Exception as e:
                print(f"Error reading the file '{filename}': {e}")
except Exception as e:
    print(f"An error occurred: {e}")

print("\nDataset visualization process completed.")


Visualizing the first 5 rows of the datasets in: /content/drive/MyDrive/Worms/Resultados/L1/L1_intensities/Datasets

--- File: straighten_worm_intensity_median.csv ---
First 5 rows:
       0       1       2      3       4       5       6       7       8  \
0  128.6  129.00  129.40  129.6  129.10  127.75  125.75  123.25  121.94   
1  129.4  129.90  130.50  130.4  129.80  127.90  126.60  124.00  119.75   
2  128.0  127.56  125.94  126.1  123.40  120.44  121.00  120.60  119.94   
3  128.9  127.70  126.44  126.0  125.00  124.44  123.80  122.60  122.90   
4  129.8  128.90  128.80  128.1  127.44  125.75  125.80  126.30  125.06   

        9  ...     121     122     123     124     125    126     127    128  \
0  121.06  ...  126.56  126.44  126.94  125.94  125.56  126.8  128.80  129.9   
1  123.50  ...  125.20  126.50  126.94  126.56  127.30  127.8  128.60  129.2   
2  120.80  ...  126.56  128.40  129.20  130.10  130.50  130.6  130.10  130.4   
3  122.30  ...  127.75  127.30  126.75  127.06