In [1]:
import h5py
import json

def extract_umap_metadata(h5_path):
    """Extract metadata from UMAP H5 file.
    
    Args:
        h5_path (str): Path to the H5 file
        
    Returns:
        dict: Dictionary containing metadata including configuration and timestamp
    """
    with h5py.File(h5_path, 'r') as f:
        # Extract metadata from attributes
        metadata = json.loads(f.attrs['metadata'])
        
        # Get dataset information
        n_samples = f['embeddings'].shape[0]
        n_dimensions = f['embeddings'].shape[1]
        
        # Get class names
        class_names = [name.decode('utf-8') for name in f['class_names']]
        
        # Add additional information to metadata
        metadata['n_samples'] = n_samples
        metadata['n_dimensions'] = n_dimensions
        metadata['class_names'] = class_names
        
    return metadata

# Example usage:
# h5_path = "path/to/your/umap_20240312_123456_abcd1234.h5"
# metadata = extract_umap_metadata(h5_path)
# print("Timestamp:", metadata['timestamp'])
# print("\nUMAP Configuration:")
# print(json.dumps(metadata['config'], indent=2))
# print(f"\nDataset Information:")
# print(f"Number of samples: {metadata['n_samples']}")
# print(f"Number of dimensions: {metadata['n_dimensions']}")
# print("Classes:", metadata['class_names'])

In [2]:
path = "/home/DAVIDSON/dutuller/Workspace/DRI1/MusicGen/results/UMAP/umap_20241110_145427_346bc573.h5"
data = extract_umap_metadata(path)
print(data)

{'timestamp': '2024-11-10T14:54:27.849347', 'config': {'datasets': [{'dataset': 'acpasXsample_noise', 'method': 'last', 'segment': 15, 'stride': 15}], 'umap': {'n_neighbors': 7, 'min_dist': 0.01, 'n_components': 2, 'random_seed': None, 'metric': 'euclidean'}}, 'n_samples': 4159, 'n_dimensions': 2, 'class_names': ['acpasXsample_noise']}
