<a href="https://colab.research.google.com/github/AnhVietPham/Machine-Learning/blob/main/Intro_H5File.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **https://colab.research.google.com/github/astg606/py_materials/blob/master/science_data_format/introduction_h5py.ipynb**

In [49]:
import os 
import datetime as dt
import six
import numpy as np
import h5py

In [50]:
hdFileName = 'sample_h5.h5'
modeType = 'w'
hdfid = h5py.File(hdFileName, modeType)

In [51]:
print(hdfid)
comp = 9

<HDF5 file "sample_h5.h5" (mode r+)>


In [52]:
lat = np.arange(-90, 91, 2.0)
dset = hdfid.require_dataset('lat', shape = lat.shape, 
                             dtype = np.float32, 
                             compression = 'gzip', 
                             compression_opts = comp)

dset[...] = lat
dset.attrs['name'] = 'latitude'
dset.attrs['units'] = 'A'

# =================================
lon = np.arange(-180, 180,2.5)
dset = hdfid.require_dataset('lon', shape=lon.shape, 
                             dtype = np.float32, 
                             compression = 'gzip', 
                             compression_opts = comp)

dset[...] = lon
dset.attrs['name'] = 'longtitue'
dset.attrs['units'] = 'B'

# ===================================
lev = np.arange(0, 72, 1)
dset = hdfid.require_dataset('lev', shape= lev.shape, 
                             dtype= np.int, 
                             compression='gzip', 
                             compression_opts =comp)

dset[...] = lev
dset.attrs.update({'name': 'Vertical level',
                   'units': 'hPa'
                   })


# Time (Note the unlimied dimension)
time = np.arange(0,1,1)
dset = hdfid.require_dataset('time',shape = time.shape,
                             maxshape = (None),
                             dtype = np.float32, 
                             compression = comp)

dset[...] = time
dset.attrs['name'] = 'time'
dset.attrs['units'] = 'hours since 2021-30-4'
dset.attrs['calendar'] = 'gregorian'


In [53]:
nrecs = 5
arr = np.zeros((nrecs, lev.size, lat.size, lon.size))
arr[0:nrecs,:,:,:] = 300*np.random.uniform(size = (nrecs, lev.size, lat.size, lon.size))
dset = hdfid.require_dataset('temp', shape = arr.shape, dtype = np.float32, compression = comp)


dset[...] = arr
dset.attrs['name'] = 'temperature'
dset.attrs['units'] = 'K'


In [54]:
arr2 = np.zeros((lat.size, lon.size))
arr2[:,:] = np.random.random(size=(lat.size, lon.size))
landfrac = hdfid.create_dataset('landfac', data = arr2, dtype = np.float32)
landfrac.attrs['name'] = 'Fraction of land'
landfrac.attrs['units'] = '1'

In [55]:
hdfid.attrs['Description'] = 'Anh Viet Pham Supper'
hdfid.attrs['History']     = 'AVP Create for sample HD5 File'
hdfid.attrs['Source']      = 'NASA AVP'
hdfid.attrs['HDF5_Version'] = six.u(h5py.version.hdf5_version)
hdfid.attrs['h5py_version'] = six.u(h5py.version.version)

In [56]:
glob_attr = {'Date': dt.datetime.now().strftime("%m/%d/%Y, %H:%M:%S"), 
            'User': 'AVP',}
hdfid.attrs.update(glob_attr)

In [57]:
import json

metadata = {'Note-AVP': 'AVP LOAD METADATA', 
            'OS-AVP': os.name,}
m = hdfid.create_dataset('meta-data-avp', data=json.dumps(metadata))

In [58]:
for k in hdfid.attrs.keys():
    print('{} => {}'.format(k, hdfid.attrs[k]))

Date => 05/01/2021, 00:06:38
Description => Anh Viet Pham Supper
HDF5_Version => 1.10.4
History => AVP Create for sample HD5 File
Source => NASA AVP
User => AVP
h5py_version => 2.10.0


In [59]:
metadata_read = json.loads(hdfid['meta-data-avp'][()])
for k in metadata_read:
  print('{} => {}'.format(k, metadata_read[k]))

Note-AVP => AVP LOAD METADATA
OS-AVP => posix


In [60]:
hdfid.close()

In [61]:
from google.colab import files
uploaded = files.upload()


Saving DogBreedModel.h5 to DogBreedModel.h5


In [62]:
print(uploaded)

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [76]:
with h5py.File('/content/DogBreedModel.h5','r') as f:
  f.visit(print)

model_weights
model_weights/dense
model_weights/dense/dense
model_weights/dense/dense/bias:0
model_weights/dense/dense/kernel:0
model_weights/dropout
optimizer_weights
optimizer_weights/Adam
optimizer_weights/Adam/dense
optimizer_weights/Adam/dense/bias
optimizer_weights/Adam/dense/bias/m:0
optimizer_weights/Adam/dense/bias/v:0
optimizer_weights/Adam/dense/kernel
optimizer_weights/Adam/dense/kernel/m:0
optimizer_weights/Adam/dense/kernel/v:0
optimizer_weights/Adam/iter:0


In [77]:
def my_func(name):
    print(name, hf[name])

with h5py.File('/content/DogBreedModel.h5', 'r') as hf:
     hf.visit(my_func)

model_weights <HDF5 group "/model_weights" (2 members)>
model_weights/dense <HDF5 group "/model_weights/dense" (1 members)>
model_weights/dense/dense <HDF5 group "/model_weights/dense/dense" (2 members)>
model_weights/dense/dense/bias:0 <HDF5 dataset "bias:0": shape (120,), type "<f4">
model_weights/dense/dense/kernel:0 <HDF5 dataset "kernel:0": shape (6592, 120), type "<f4">
model_weights/dropout <HDF5 group "/model_weights/dropout" (0 members)>
optimizer_weights <HDF5 group "/optimizer_weights" (1 members)>
optimizer_weights/Adam <HDF5 group "/optimizer_weights/Adam" (2 members)>
optimizer_weights/Adam/dense <HDF5 group "/optimizer_weights/Adam/dense" (2 members)>
optimizer_weights/Adam/dense/bias <HDF5 group "/optimizer_weights/Adam/dense/bias" (2 members)>
optimizer_weights/Adam/dense/bias/m:0 <HDF5 dataset "m:0": shape (120,), type "<f4">
optimizer_weights/Adam/dense/bias/v:0 <HDF5 dataset "v:0": shape (120,), type "<f4">
optimizer_weights/Adam/dense/kernel <HDF5 group "/optimizer

In [78]:
def printall(name, obj):
    print(name, dict(obj.attrs))

with h5py.File('/content/DogBreedModel.h5', 'r') as hf:
     hf.visititems(printall)

model_weights {'backend': b'tensorflow', 'keras_version': b'2.4.0', 'layer_names': array([b'dropout', b'dense'], dtype='|S7')}
model_weights/dense {'weight_names': array([b'dense/kernel:0', b'dense/bias:0'], dtype='|S14')}
model_weights/dense/dense {}
model_weights/dense/dense/bias:0 {}
model_weights/dense/dense/kernel:0 {}
model_weights/dropout {'weight_names': array([], dtype=float64)}
optimizer_weights {'weight_names': array([b'Adam/iter:0', b'Adam/dense/kernel/m:0', b'Adam/dense/bias/m:0',
       b'Adam/dense/kernel/v:0', b'Adam/dense/bias/v:0'], dtype='|S21')}
optimizer_weights/Adam {}
optimizer_weights/Adam/dense {}
optimizer_weights/Adam/dense/bias {}
optimizer_weights/Adam/dense/bias/m:0 {}
optimizer_weights/Adam/dense/bias/v:0 {}
optimizer_weights/Adam/dense/kernel {}
optimizer_weights/Adam/dense/kernel/m:0 {}
optimizer_weights/Adam/dense/kernel/v:0 {}
optimizer_weights/Adam/iter:0 {}


In [80]:
def printall(name, obj):
    if isinstance(obj, h5py.Group):
        print(name, " is a Group")
    elif isinstance(obj, h5py.Dataset):
        print(name, " is a Dataset")
    else:
        print(name, " is of an unknown type")

with h5py.File('/content/DogBreedModel.h5', 'r') as hf:
     hf.visititems(printall)

model_weights  is a Group
model_weights/dense  is a Group
model_weights/dense/dense  is a Group
model_weights/dense/dense/bias:0  is a Dataset
model_weights/dense/dense/kernel:0  is a Dataset
model_weights/dropout  is a Group
optimizer_weights  is a Group
optimizer_weights/Adam  is a Group
optimizer_weights/Adam/dense  is a Group
optimizer_weights/Adam/dense/bias  is a Group
optimizer_weights/Adam/dense/bias/m:0  is a Dataset
optimizer_weights/Adam/dense/bias/v:0  is a Dataset
optimizer_weights/Adam/dense/kernel  is a Group
optimizer_weights/Adam/dense/kernel/m:0  is a Dataset
optimizer_weights/Adam/dense/kernel/v:0  is a Dataset
optimizer_weights/Adam/iter:0  is a Dataset
