# Extract_metadata
In this notebook, we will look at how to extract image metadata from the Z=0 plane of raw light sheet microscope data.

In [1]:
import tifffile
from xml.etree import ElementTree as ET 

In [2]:
# Example Z=0 raw file
raw_file = '/jukebox/LightSheetTransfer/brody/191031_e111_1_1x_488_008na_1hfds_z10um_100msec_60povlp_16-57-24/16-57-24_UltraII_raw_RawDataStack[00 x 00]_C00_xyz-Table Z0000_UltraII Filter0000.ome.tif'

In [3]:
# Grab the metadata tags
with tifffile.TiffFile(raw_file) as tif:
    tags = tif.pages[0].tags
tags.keys()

dict_keys(['ImageWidth', 'ImageLength', 'BitsPerSample', 'Compression', 'PhotometricInterpretation', 'DocumentName', 'ImageDescription', 'Make', 'Model', 'StripOffsets', 'Orientation', 'SamplesPerPixel', 'RowsPerStrip', 'StripByteCounts', 'PlanarConfiguration', 'DateTime', 'ImageDepth'])

In [4]:
# Try to access one, but notice that dictionary values are TiffTag() objects
tags['ImageWidth']

<tifffile.tifffile.TiffTag at 0x7f5d999a82c8>

In [5]:
# To actually extract info use
tag = tags['ImageWidth']
print(tag.value)

2160


## Example: get the pixel size in x,y,z dimensions


In [6]:
# That info lives in a very long xml string stored in the 'ImageDescription' tag
xml_description=tags['ImageDescription'].value
# Print out first 1000 characters
xml_description[0:1000]

'<?xml version="1.0" encoding="UTF-8"?><OME xmlns="http://www.openmicroscopy.org/Schemas/OME/2008-02" xmlns:ca="http://www.openmicroscopy.org/Schemas/CA/2008-02" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openmicroscopy.org/Schemas/OME/2008-02 http://www.openmicroscopy.org/Schemas/OME/2008-02/ome.xsd" UUID="urn:uuid:9486A33E-5005-4614-B0DB-89C1E338D0A5">\r\n<Experimenter ID="Experimenter:not_specified_not_specified">\r\n<FirstName>not_specified</FirstName>\r\n<LastName>not_specified</LastName>\r\n<Email>not_specified</Email>\r\n<Institution>not_specified</Institution>\r\n</Experimenter>\r\n<Group ID="urn:lsid:imspector.group.id:Group:not_specified" Name="not_specified"/>\r\n<Image ID="Image:A4C8152E-3001-4001-AA32-4914EAE1EE30" Name="16-57-24_UltraII_raw_RawDataStack[00 x 00]_C00_xyz-Table Z0000_UltraII Filter0000.ome.tif" DefaultPixels="Pixels:A4C8152E-3001-4001-AA32-4914EAE1EE30">\r\n<CreationDate>2019-10-31T17:34:42</CreationDate>\r\n<Descri

In [7]:
# Parse the xml using ElementTree
root = ET.fromstring(xml_description)
print(root)

<Element '{http://www.openmicroscopy.org/Schemas/OME/2008-02}OME' at 0x7f5d5f9f1138>


In [8]:
# The tree is graph-like so to access further down the tree loop through children of root node
for child in root:
    print(child.tag,child.attrib)

{http://www.openmicroscopy.org/Schemas/OME/2008-02}Experimenter {'ID': 'Experimenter:not_specified_not_specified'}
{http://www.openmicroscopy.org/Schemas/OME/2008-02}Group {'ID': 'urn:lsid:imspector.group.id:Group:not_specified', 'Name': 'not_specified'}
{http://www.openmicroscopy.org/Schemas/OME/2008-02}Image {'ID': 'Image:A4C8152E-3001-4001-AA32-4914EAE1EE30', 'Name': '16-57-24_UltraII_raw_RawDataStack[00 x 00]_C00_xyz-Table Z0000_UltraII Filter0000.ome.tif', 'DefaultPixels': 'Pixels:A4C8152E-3001-4001-AA32-4914EAE1EE30'}
{http://www.openmicroscopy.org/Schemas/CA/2008-02}CustomAttributes {}


In [9]:
# The "Image" tag, i.e. root[2] is the one we want to expand
for child in root[2]:
    print(child.tag,child.attrib)

{http://www.openmicroscopy.org/Schemas/OME/2008-02}CreationDate {}
{http://www.openmicroscopy.org/Schemas/OME/2008-02}Description {}
{http://www.openmicroscopy.org/Schemas/OME/2008-02}Pixels {'ID': 'Pixels:A4C8152E-3001-4001-AA32-4914EAE1EE30', 'DimensionOrder': 'XYCZT', 'PixelType': 'uint16', 'BigEndian': 'false', 'SizeX': '2160', 'SizeY': '2560', 'SizeZ': '920', 'SizeT': '1', 'SizeC': '2', 'PhysicalSizeX': '5.909091', 'PhysicalSizeY': '5.909091', 'PhysicalSizeZ': '10.000000'}
{http://www.openmicroscopy.org/Schemas/CA/2008-02}CustomAttributes {}


In [10]:
# The pixel size is in the PhysicalSizeX, PhysicalSizeY, PhysicalSizeZ attributes, which are in the "Pixels" tag
image_tag = root[2]
pixel_tag = image_tag[2]
pixel_dict = pixel_tag.attrib
print(pixel_dict['PhysicalSizeX'],pixel_dict['PhysicalSizeY'],pixel_dict['PhysicalSizeZ'])

5.909091 5.909091 10.000000


In [11]:
# To get a better sense of what is stored in the xml description file, you can either save the xml as a file with 
# Extension .xml and then load it in a browser, or do:
import xml.dom.minidom
xmlstr = xml.dom.minidom.parseString(ET.tostring(root)).toprettyxml()
print(xmlstr)

<?xml version="1.0" ?>
<ns0:OME UUID="urn:uuid:9486A33E-5005-4614-B0DB-89C1E338D0A5" xmlns:ns0="http://www.openmicroscopy.org/Schemas/OME/2008-02" xmlns:ns2="http://www.openmicroscopy.org/Schemas/CA/2008-02" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openmicroscopy.org/Schemas/OME/2008-02 http://www.openmicroscopy.org/Schemas/OME/2008-02/ome.xsd">
	

	<ns0:Experimenter ID="Experimenter:not_specified_not_specified">
		

		<ns0:FirstName>not_specified</ns0:FirstName>
		

		<ns0:LastName>not_specified</ns0:LastName>
		

		<ns0:Email>not_specified</ns0:Email>
		

		<ns0:Institution>not_specified</ns0:Institution>
		

	</ns0:Experimenter>
	

	<ns0:Group ID="urn:lsid:imspector.group.id:Group:not_specified" Name="not_specified"/>
	

	<ns0:Image DefaultPixels="Pixels:A4C8152E-3001-4001-AA32-4914EAE1EE30" ID="Image:A4C8152E-3001-4001-AA32-4914EAE1EE30" Name="16-57-24_UltraII_raw_RawDataStack[00 x 00]_C00_xyz-Table Z0000_UltraII Filter0000.ome.tif">
		



## Example: get experimenter information (netid)

In [12]:
# The experimenter info is in the Experimenter tag:
exp_tag = root[0]
for child in exp_tag:
    print(child,child.attrib)

<Element '{http://www.openmicroscopy.org/Schemas/OME/2008-02}FirstName' at 0x7f5d5f9f1278> {}
<Element '{http://www.openmicroscopy.org/Schemas/OME/2008-02}LastName' at 0x7f5d5f9f12c8> {}
<Element '{http://www.openmicroscopy.org/Schemas/OME/2008-02}Email' at 0x7f5d5f9f1318> {}
<Element '{http://www.openmicroscopy.org/Schemas/OME/2008-02}Institution' at 0x7f5d5f9f1368> {}


In [13]:
# Here we see the attrib dict is empty, so how do we actually extract the email?
email_tag = exp_tag[2]
print(dir(email_tag))

['__class__', '__copy__', '__deepcopy__', '__delattr__', '__delitem__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__len__', '__lt__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setitem__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', 'append', 'attrib', 'clear', 'extend', 'find', 'findall', 'findtext', 'get', 'getchildren', 'getiterator', 'insert', 'items', 'iter', 'iterfind', 'itertext', 'keys', 'makeelement', 'remove', 'set', 'tag', 'tail', 'text']


In [14]:
email_tag.text

'not_specified'

## Save the metadata xml file and view it in a browser (best option for viewing)

In [16]:
# Just need to write the xml string to a file
import os
output_filename = 'example_lightsheet_metadata.xml'
if not os.path.exists(output_filename):
    with open(output_filename,'w') as outfile:
        outfile.write(xml_description)
print("wrote %s" % output_filename)

wrote example_lightsheet_metadata.xml
