# Reading SEM images in tiff format and create an HDF5 file

In this notebook we define standard function to read tiff images and we try to build a suitable hdf5 file.

In [1]:
pip install tifffile


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [97]:
import numpy as np
import h5py
import tifffile
import os
path_file='' # insert the path of this notebook
path_tiff=path_file+'img_sem/'
name_img='8_00.tif'

We define a function to read the metadata of a SEM image in tiff format

In [78]:
def extract_tiff_metadata(file_path):
   metadata = {}
  # Open the TIFF file
   with tifffile.TiffFile(file_path) as tif:
    # Print general metadata
      #print(f"File: {file_path}")
      #print(f"Number of pages: {len(tif.pages)}")
  # Iterate over pages and print page-specific metadata

      for i, page in enumerate(tif.pages):
             #print(f"\nMetadata for page {i + 1}:")
             tags = page.tags
             for tag in tags.values():
                 name, value = tag.name, tag.value
                 metadata[name] = value

   return metadata

 



Let's give a look at the first keys and at those contained in the value of the key 'CZ_SEM'

In [79]:
metadata = extract_tiff_metadata(path_tiff+name_img)
print(metadata.keys())
#print('CZ_SEM')
#print(metadata['CZ_SEM'].keys())

dict_keys(['NewSubfileType', 'ImageWidth', 'ImageLength', 'BitsPerSample', 'Compression', 'PhotometricInterpretation', 'StripOffsets', 'SamplesPerPixel', 'RowsPerStrip', 'StripByteCounts', 'XResolution', 'YResolution', 'ResolutionUnit', 'ColorMap', 'CZ_SEM'])


You may extract the data as an array with

In [5]:
image_array = tifffile.imread(path_tiff+name_img)


### Exercise 

Create an hdf5 file that contains all the images in the folder /img_sem, arrange data and metadata in the most suitable way according to your opinion. 

*Suggestion : you could check if there are some metadata in common among all the images. Think how you would describe a SEM measurement. If you are not a SEM expert focus on images metadata such as pixel, dimensions etc, or general information such as username, etc.* 

### Solution



In [101]:
def extract_tiff_metadata_new(file_path):
   metadata = {}
  # Open the TIFF file
   with tifffile.TiffFile(file_path) as tif:
    # Print general metadata
      #print(f"File: {file_path}")
      #print(f"Number of pages: {len(tif.pages)}")
  # Iterate over pages and print page-specific metadata

      for i, page in enumerate(tif.pages):
             metadata_page = {}
             metadata['page'+str(i+1)]=metadata_page
             #print(f"\nMetadata for page {i + 1}:")
             tags = page.tags
             for tag in tags.values():
                 name, value = tag.name, tag.value
                 metadata_page[name] = value

   return metadata

In [109]:
key_selected=['ImageWidth', 'ImageLength', 'BitsPerSample', 'Compression',  'XResolution', 'YResolution']


In [94]:
#check if the function works
for img in list_f:
    if img.split('.')[1]=='tif':
        
        metadata = extract_tiff_metadata_new(path_tiff+img)
        print(metadata['page1'].keys())

dict_keys(['NewSubfileType', 'ImageWidth', 'ImageLength', 'BitsPerSample', 'Compression', 'PhotometricInterpretation', 'StripOffsets', 'SamplesPerPixel', 'RowsPerStrip', 'StripByteCounts', 'XResolution', 'YResolution', 'ResolutionUnit', 'ColorMap', 'CZ_SEM'])
dict_keys(['NewSubfileType', 'ImageWidth', 'ImageLength', 'BitsPerSample', 'Compression', 'PhotometricInterpretation', 'StripOffsets', 'SamplesPerPixel', 'RowsPerStrip', 'StripByteCounts', 'XResolution', 'YResolution', 'ResolutionUnit', 'ColorMap', 'CZ_SEM'])
dict_keys(['NewSubfileType', 'ImageWidth', 'ImageLength', 'BitsPerSample', 'Compression', 'PhotometricInterpretation', 'StripOffsets', 'SamplesPerPixel', 'RowsPerStrip', 'StripByteCounts', 'XResolution', 'YResolution', 'ResolutionUnit', 'ColorMap', 'CZ_SEM'])


In [110]:
f = h5py.File('img_sem_sol1.hdf5', "w")

In [111]:
#initialize a counter
i=0
#run among the fiels and  for each one check if it is a tif
for img in list_f:
    if img.split('.')[1]=='tif':
        #extract metadata and data
        metadata = extract_tiff_metadata_new(path_tiff+img)
        image_array = tifffile.imread(path_tiff+img)
        #create a group image_i and put the data in a dataset named data
        f['/image'+str(i+1)+'/data']=image_array
        
        for k_p in metadata.keys():
            f['/image'+str(i+1)].create_group(k_p)
        
            for k in key_selected:   
                     f['/image'+str(i+1)+'/'+k_p+'/'+k]=metadata[k_p][k]
        i=i+1          
f.close()                
    

In [65]:
pip install nexusformat


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [112]:
from nexusformat.nexus import *

In [114]:
test=nxload(path_file+'img_sem_sol1.hdf5')
print(test.tree)

root:NXroot
  image1:NXgroup
    data = uint8(768x1024)
    page1:NXgroup
      BitsPerSample = 8
      Compression = 1
      ImageLength = 768
      ImageWidth = 1024
      XResolution = [1 1]
      YResolution = [1 1]
  image2:NXgroup
    data = uint8(768x1024)
    page1:NXgroup
      BitsPerSample = 8
      Compression = 1
      ImageLength = 768
      ImageWidth = 1024
      XResolution = [1 1]
      YResolution = [1 1]
  image3:NXgroup
    data = uint8(768x1024)
    page1:NXgroup
      BitsPerSample = 8
      Compression = 1
      ImageLength = 768
      ImageWidth = 1024
      XResolution = [1 1]
      YResolution = [1 1]
