In [None]:
# import numpy as np
# import h5py
# import json
# from pathlib import Path
# import zlib
# from typing import Union, Dict, Any, List
# import scipy.sparse as sparse

# class MatrixStorage:
#     """Utility class for efficient matrix storage with different formats."""
    
#     @staticmethod
#     def save_matrices(matrices: Dict[str, Union[np.ndarray, sparse.spmatrix]], 
#                      filepath: str,
#                      format: str = 'npz',
#                      compress: bool = True,
#                      metadata: Dict[str, Any] = None) -> None:
#         """
#         Save multiple matrices in a single file with their associated keys.
        
#         Args:
#             matrices: Dictionary mapping keys to matrices
#             filepath: Path to save the file
#             format: Format to save ('npz', 'hdf5')
#             compress: Whether to use compression
#             metadata: Optional dictionary of metadata to save
#         """
#         filepath = Path(filepath)
#         filepath.parent.mkdir(parents=True, exist_ok=True)
        
#         if format == 'npz':
#             save_dict = {}
#             sparse_info = {}
            
#             for key, matrix in matrices.items():
#                 if sparse.issparse(matrix):
#                     # For sparse matrices, save the format and convert to COO
#                     matrix = matrix.tocoo()
#                     save_dict[f"{key}_data"] = matrix.data
#                     save_dict[f"{key}_row"] = matrix.row
#                     save_dict[f"{key}_col"] = matrix.col
#                     sparse_info[key] = {
#                         'shape': matrix.shape,
#                         'format': 'sparse'
#                     }
#                 else:
#                     save_dict[key] = matrix
            
#             if metadata or sparse_info:
#                 save_dict['__metadata__'] = {
#                     'user_metadata': metadata if metadata else {},
#                     'sparse_info': sparse_info
#                 }
            
#             if compress:
#                 np.savez_compressed(filepath, **save_dict)
#             else:
#                 np.savez(filepath, **save_dict)
                
#         elif format == 'hdf5':
#             with h5py.File(filepath, 'w') as f:
#                 for key, matrix in matrices.items():
#                     if sparse.issparse(matrix):
#                         if not isinstance(matrix, sparse.coo_matrix):
#                             matrix = matrix.tocoo()
                        
#                         g = f.create_group(key)
#                         g.create_dataset('data', data=matrix.data, 
#                                        compression='gzip' if compress else None)
#                         g.create_dataset('row', data=matrix.row, 
#                                        compression='gzip' if compress else None)
#                         g.create_dataset('col', data=matrix.col, 
#                                        compression='gzip' if compress else None)
#                         g.attrs['shape'] = matrix.shape
#                         g.attrs['format'] = 'sparse'
#                     else:
#                         f.create_dataset(key, data=matrix, 
#                                        compression='gzip' if compress else None)
                
#                 if metadata:
#                     # Properly serialize metadata to JSON string
#                     f.create_dataset('__metadata__', data=json.dumps(metadata))
#         else:
#             raise ValueError(f"Unsupported format: {format}")
    
#     @staticmethod
#     def load_matrices(filepath: str) -> tuple[Dict[str, Union[np.ndarray, sparse.spmatrix]], Dict[str, Any]]:
#         """
#         Load multiple matrices from file.
        
#         Returns:
#             Tuple of (dictionary of matrices, metadata)
#         """
#         filepath = Path(filepath)
        
#         if filepath.suffix == '.npz':
#             with np.load(filepath, allow_pickle=True) as data:
#                 matrices = {}
#                 metadata = {}
#                 sparse_info = {}
                
#                 # Load metadata if it exists
#                 if '__metadata__' in data:
#                     meta_dict = data['__metadata__'].item()
#                     metadata = meta_dict.get('user_metadata', {})
#                     sparse_info = meta_dict.get('sparse_info', {})
                
#                 # Process each key
#                 for key in data.files:
#                     if key == '__metadata__':
#                         continue
                        
#                     # Check if this is part of a sparse matrix
#                     base_key = key.split('_')[0]
#                     if base_key in sparse_info:
#                         if key.endswith('_data'):
#                             # Reconstruct sparse matrix
#                             data_arr = data[f"{base_key}_data"]
#                             row_arr = data[f"{base_key}_row"]
#                             col_arr = data[f"{base_key}_col"]
#                             shape = sparse_info[base_key]['shape']
                            
#                             matrices[base_key] = sparse.coo_matrix(
#                                 (data_arr, (row_arr, col_arr)),
#                                 shape=shape
#                             )
#                     elif not key.endswith(('_row', '_col')):  # Skip sparse matrix components
#                         matrices[key] = data[key]
                
#                 return matrices, metadata
                    
#         elif filepath.suffix == '.h5':
#             with h5py.File(filepath, 'r') as f:
#                 matrices = {}
#                 metadata = {}
                
#                 # Load metadata if it exists
#                 if '__metadata__' in f:
#                     try:
#                         metadata = json.loads(f['__metadata__'][()])
#                     except json.JSONDecodeError:
#                         # Handle case where metadata might be stored as string
#                         metadata = {}
                
#                 # Load each matrix
#                 for key in f.keys():
#                     if key == '__metadata__':
#                         continue
                        
#                     if isinstance(f[key], h5py.Group):
#                         # Load sparse matrix
#                         g = f[key]
#                         if 'format' in g.attrs and g.attrs['format'] == 'sparse':
#                             matrices[key] = sparse.coo_matrix(
#                                 (g['data'][:], (g['row'][:], g['col'][:])),
#                                 shape=g.attrs['shape']
#                             )
#                     else:
#                         matrices[key] = f[key][:]
                
#                 return matrices, metadata
        
#         raise ValueError(f"Unsupported file format: {filepath.suffix}")

#     @staticmethod
#     def get_keys(filepath: str) -> List[str]:
#         """
#         Get the keys of stored matrices without loading the data.
        
#         Args:
#             filepath: Path to the file
            
#         Returns:
#             List of matrix keys
#         """
#         filepath = Path(filepath)
        
#         if filepath.suffix == '.npz':
#             with np.load(filepath, allow_pickle=True) as data:
#                 keys = set()
#                 sparse_info = {}
                
#                 # Load sparse info if it exists
#                 if '__metadata__' in data:
#                     meta_dict = data['__metadata__'].item()
#                     sparse_info = meta_dict.get('sparse_info', {})
                
#                 # Get unique keys, handling sparse matrix components
#                 for key in data.files:
#                     if key == '__metadata__':
#                         continue
#                     base_key = key.split('_')[0]
#                     if base_key in sparse_info:
#                         keys.add(base_key)
#                     elif not key.endswith(('_row', '_col', '_data')):
#                         keys.add(key)
                
#                 return sorted(list(keys))
                
#         elif filepath.suffix == '.h5':
#             with h5py.File(filepath, 'r') as f:
#                 return [k for k in f.keys() if k != '__metadata__']
        
#         raise ValueError(f"Unsupported file format: {filepath.suffix}")

In [12]:
# # Create sample matrices
# matrices = {
#     'dense_matrix': [np.ones((500, 300)), np.random.rand(1000, 1000)],
#     'sparse_matrix': sparse.random(1000, 1000, density=0.01),
#     'small_matrix': np.random.rand(10, 10)
# }

# # Save all matrices with metadata
# MatrixStorage.save_matrices(
#     matrices,
#     'multiple_matrices.h5',
#     format='hdf5',
#     compress=True,
#     metadata={'description': 'Collection of test matrices'}
# )

# # Get keys without loading data
# keys = MatrixStorage.get_keys('multiple_matrices.h5')
# print(f"Available matrices: {keys}")

# # Load all matrices
# loaded_matrices, metadata = MatrixStorage.load_matrices('multiple_matrices.h5')

In [13]:
layer_name, param = "Layer.name.W".rsplit('.', 1)
layer, name = layer_name.split('.', 1)

print((layer, name, param))

('Layer', 'name', 'W')


In [14]:
# loaded_matrices["dense_matrix"].shape

In [15]:
import numpy as np
import h5py
import json
from pathlib import Path
from typing import Union, Dict, Any, List
import scipy.sparse as sparse

class TensorStorage:
    @staticmethod
    def save_tensors(tensors: Dict[str, Union[np.ndarray, sparse.spmatrix, List[np.ndarray], List[sparse.spmatrix]]], 
                     filepath: str,
                     format: str = 'hdf5',
                     compress: bool = True,
                     metadata: Dict[str, Any] = None) -> None:
        filepath = Path(filepath)
        filepath.parent.mkdir(parents=True, exist_ok=True)
        
        if format == 'hdf5':
            with h5py.File(filepath, 'w') as f:
                for key, tensor in tensors.items():
                    if isinstance(tensor, list):
                        g = f.create_group(key)
                        g.attrs['is_list'] = True
                        for i, t in enumerate(tensor):
                            if sparse.issparse(t):
                                sg = g.create_group(f'item_{i}')
                                t = t.tocoo()
                                sg.create_dataset('data', data=t.data, compression='gzip' if compress else None)
                                sg.create_dataset('row', data=t.row, compression='gzip' if compress else None)
                                sg.create_dataset('col', data=t.col, compression='gzip' if compress else None)
                                sg.attrs['shape'] = t.shape
                                sg.attrs['format'] = 'sparse'
                            else:
                                g.create_dataset(f'item_{i}', data=t, compression='gzip' if compress else None)
                    else:
                        if sparse.issparse(tensor):
                            g = f.create_group(key)
                            tensor = tensor.tocoo()
                            g.create_dataset('data', data=tensor.data, compression='gzip' if compress else None)
                            g.create_dataset('row', data=tensor.row, compression='gzip' if compress else None)
                            g.create_dataset('col', data=tensor.col, compression='gzip' if compress else None)
                            g.attrs['shape'] = tensor.shape
                            g.attrs['format'] = 'sparse'
                        else:
                            f.create_dataset(key, data=tensor, compression='gzip' if compress else None)
                
                if metadata:
                    f.create_dataset('__metadata__', data=json.dumps(metadata))
        else:
            raise ValueError("Only HDF5 format supported for tensor lists")
    
    @staticmethod
    def load_tensors(filepath: str) -> tuple[Dict[str, Union[np.ndarray, sparse.spmatrix, List[Union[np.ndarray, sparse.spmatrix]]]], Dict[str, Any]]:
        filepath = Path(filepath)
        
        with h5py.File(filepath, 'r') as f:
            tensors = {}
            metadata = {}
            
            if '__metadata__' in f:
                metadata = json.loads(f['__metadata__'][()])
            
            for key in f.keys():
                if key == '__metadata__':
                    continue
                    
                if isinstance(f[key], h5py.Group):
                    if 'is_list' in f[key].attrs:
                        tensor_list = []
                        for i in range(len(f[key].keys())):
                            item = f[key][f'item_{i}']
                            if isinstance(item, h5py.Group):
                                tensor_list.append(sparse.coo_matrix(
                                    (item['data'][:], (item['row'][:], item['col'][:])),
                                    shape=item.attrs['shape']
                                ))
                            else:
                                tensor_list.append(item[:])
                        tensors[key] = tensor_list
                    else:
                        g = f[key]
                        tensors[key] = sparse.coo_matrix(
                            (g['data'][:], (g['row'][:], g['col'][:])),
                            shape=g.attrs['shape']
                        )
                else:
                    tensors[key] = f[key][:]
            
            return tensors, metadata

# Example usage


In [16]:
tensors = {
    'single_tensor': np.random.rand(10, 10),
    'tensor_list': [np.random.rand(5, 5) for _ in range(3)],
    'mixed_list': [np.random.rand(3, 3), sparse.random(10, 10, density=0.1)]
}

storage = TensorStorage()
storage.save_tensors(tensors, './models/tensors.h5')
loaded_tensors, _ = storage.load_tensors('tensors.h5')

In [17]:
with open("./fold/test.json", "w") as f:
    json.dump({"a":"b"}, f, indent=4)

FileNotFoundError: [Errno 2] No such file or directory: './fold/test.json'

In [11]:
loaded_tensors

{'mixed_list': [array([[0.06383093, 0.74544926, 0.48400633],
         [0.36323882, 0.9379993 , 0.41887101],
         [0.13188227, 0.30579926, 0.47064655]]),
  <10x10 sparse matrix of type '<class 'numpy.float64'>'
  	with 10 stored elements in COOrdinate format>],
 'single_tensor': array([[0.9837431 , 0.39758005, 0.80212789, 0.21266189, 0.03932   ,
         0.65569318, 0.80397452, 0.04333962, 0.56686855, 0.71105882],
        [0.2478063 , 0.4807597 , 0.1936631 , 0.97125188, 0.8909481 ,
         0.63782771, 0.2273844 , 0.27743369, 0.71810272, 0.7007975 ],
        [0.21204309, 0.34950067, 0.29982319, 0.63328279, 0.9988186 ,
         0.62824733, 0.77554709, 0.27468339, 0.14878811, 0.17153857],
        [0.33975458, 0.40186554, 0.1133076 , 0.22064442, 0.41271228,
         0.91106548, 0.88642493, 0.43685455, 0.74526915, 0.9271316 ],
        [0.46212798, 0.79869936, 0.92611877, 0.14229006, 0.91762228,
         0.09397049, 0.71861177, 0.18176957, 0.25439358, 0.35716383],
        [0.07607653, 0.