# Extract_metadata
In this notebook, we will look at how to extract image metadata from the Z=0 plane of raw light sheet microscope data.

In [1]:
import tifffile
from xml.etree import ElementTree as ET 

In [2]:
# Example Z=0 raw file
raw_file = '/jukebox/LightSheetTransfer/kelly/201908_cfos/190820_m61468_observ_20190628_1d3x_647_008na_1hfds_z5um_250msec_14-50-40/14-50-40_UltraII_raw_RawDataStack[00 x 00]_C00_xyz-Table Z0000_UltraII Filter0000.ome.tif' 

In [3]:
# Grab the metadata tags
with tifffile.TiffFile(raw_file) as tif:
    tags = tif.pages[0].tags
tags.keys()

dict_keys(['ImageWidth', 'ImageLength', 'BitsPerSample', 'Compression', 'PhotometricInterpretation', 'DocumentName', 'ImageDescription', 'Make', 'Model', 'StripOffsets', 'Orientation', 'SamplesPerPixel', 'RowsPerStrip', 'StripByteCounts', 'PlanarConfiguration', 'DateTime', 'ImageDepth'])

In [4]:
# Try to access one, but notice that dictionary values are TiffTag() objects
tags['ImageWidth']

<tifffile.tifffile.TiffTag at 0x7f0570073818>

In [5]:
# To actually extract info use
tag = tags['ImageWidth']
print(tag.value)

2160


## Example: get the pixel size in x,y,z dimensions


In [6]:
# That info lives in a very long xml string stored in the 'ImageDescription' tag
xml_description=tags['ImageDescription'].value
# Print out first 1000 characters
xml_description[0:1000]

'<?xml version="1.0" encoding="UTF-8"?><OME xmlns="http://www.openmicroscopy.org/Schemas/OME/2008-02" xmlns:ca="http://www.openmicroscopy.org/Schemas/CA/2008-02" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openmicroscopy.org/Schemas/OME/2008-02 http://www.openmicroscopy.org/Schemas/OME/2008-02/ome.xsd" UUID="urn:uuid:C3F42E27-6499-429C-BDFF-115AC56D6CDD">\r\n<Experimenter ID="Experimenter:Zahra_Dhanerawala">\r\n<FirstName>Zahra</FirstName>\r\n<LastName>Dhanerawala</LastName>\r\n<Email>zmd@princeton.edu</Email>\r\n<Institution>PNI</Institution>\r\n</Experimenter>\r\n<Group ID="urn:lsid:imspector.group.id:Group:Wang_Lab" Name="Wang Lab"/>\r\n<Image ID="Image:28212BB8-BEDE-4CD6-A6A9-8DAEA484C34F" Name="14-50-40_UltraII_raw_RawDataStack[00 x 00]_C00_xyz-Table Z0000_UltraII Filter0000.ome.tif" DefaultPixels="Pixels:28212BB8-BEDE-4CD6-A6A9-8DAEA484C34F">\r\n<CreationDate>2019-08-20T15:11:12</CreationDate>\r\n<Description>not_specified</Description>\r\

In [7]:
# Parse the xml using ElementTree
root = ET.fromstring(xml_description)
print(root)

<Element '{http://www.openmicroscopy.org/Schemas/OME/2008-02}OME' at 0x7f0535520098>


In [8]:
# The tree is graph-like so to access further down the tree loop through children of root node
for child in root:
    print(child.tag,child.attrib)

{http://www.openmicroscopy.org/Schemas/OME/2008-02}Experimenter {'ID': 'Experimenter:Zahra_Dhanerawala'}
{http://www.openmicroscopy.org/Schemas/OME/2008-02}Group {'ID': 'urn:lsid:imspector.group.id:Group:Wang_Lab', 'Name': 'Wang Lab'}
{http://www.openmicroscopy.org/Schemas/OME/2008-02}Image {'ID': 'Image:28212BB8-BEDE-4CD6-A6A9-8DAEA484C34F', 'Name': '14-50-40_UltraII_raw_RawDataStack[00 x 00]_C00_xyz-Table Z0000_UltraII Filter0000.ome.tif', 'DefaultPixels': 'Pixels:28212BB8-BEDE-4CD6-A6A9-8DAEA484C34F'}
{http://www.openmicroscopy.org/Schemas/CA/2008-02}CustomAttributes {}


In [9]:
# The "Image" tag, i.e. root[2] is the one we want to expand
for child in root[2]:
    print(child.tag,child.attrib)

{http://www.openmicroscopy.org/Schemas/OME/2008-02}CreationDate {}
{http://www.openmicroscopy.org/Schemas/OME/2008-02}Description {}
{http://www.openmicroscopy.org/Schemas/OME/2008-02}Pixels {'ID': 'Pixels:28212BB8-BEDE-4CD6-A6A9-8DAEA484C34F', 'DimensionOrder': 'XYCZT', 'PixelType': 'uint16', 'BigEndian': 'false', 'SizeX': '2160', 'SizeY': '2560', 'SizeZ': '1343', 'SizeT': '1', 'SizeC': '2', 'PhysicalSizeX': '5.000000', 'PhysicalSizeY': '5.000000', 'PhysicalSizeZ': '5.000000'}
{http://www.openmicroscopy.org/Schemas/CA/2008-02}CustomAttributes {}


In [10]:
# The pixel size is in the PhysicalSizeX, PhysicalSizeY, PhysicalSizeZ attributes, which are in the "Pixels" tag
image_tag = root[2]
pixel_tag = image_tag[2]
pixel_dict = pixel_tag.attrib
print(pixel_dict['PhysicalSizeX'],pixel_dict['PhysicalSizeY'],pixel_dict['PhysicalSizeZ'])

5.000000 5.000000 5.000000


In [11]:
# To get a better sense of what is stored in the xml description file, you can either save the xml as a file with 
# Extension .xml and then load it in a browser, or do:
import xml.dom.minidom
xmlstr = xml.dom.minidom.parseString(ET.tostring(root)).toprettyxml()
print(xmlstr)

<?xml version="1.0" ?>
<ns0:OME UUID="urn:uuid:C3F42E27-6499-429C-BDFF-115AC56D6CDD" xmlns:ns0="http://www.openmicroscopy.org/Schemas/OME/2008-02" xmlns:ns2="http://www.openmicroscopy.org/Schemas/CA/2008-02" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openmicroscopy.org/Schemas/OME/2008-02 http://www.openmicroscopy.org/Schemas/OME/2008-02/ome.xsd">
	

	<ns0:Experimenter ID="Experimenter:Zahra_Dhanerawala">
		

		<ns0:FirstName>Zahra</ns0:FirstName>
		

		<ns0:LastName>Dhanerawala</ns0:LastName>
		

		<ns0:Email>zmd@princeton.edu</ns0:Email>
		

		<ns0:Institution>PNI</ns0:Institution>
		

	</ns0:Experimenter>
	

	<ns0:Group ID="urn:lsid:imspector.group.id:Group:Wang_Lab" Name="Wang Lab"/>
	

	<ns0:Image DefaultPixels="Pixels:28212BB8-BEDE-4CD6-A6A9-8DAEA484C34F" ID="Image:28212BB8-BEDE-4CD6-A6A9-8DAEA484C34F" Name="14-50-40_UltraII_raw_RawDataStack[00 x 00]_C00_xyz-Table Z0000_UltraII Filter0000.ome.tif">
		

		<ns0:CreationDate>2019-08-20T15:11

## Example: get experimenter information (netid)

In [13]:
# The experimenter info is in the Experimenter tag:
exp_tag = root[0]
for child in exp_tag:
    print(child,child.attrib)

<Element '{http://www.openmicroscopy.org/Schemas/OME/2008-02}FirstName' at 0x7f0535520188> {}
<Element '{http://www.openmicroscopy.org/Schemas/OME/2008-02}LastName' at 0x7f05355201d8> {}
<Element '{http://www.openmicroscopy.org/Schemas/OME/2008-02}Email' at 0x7f0535520228> {}
<Element '{http://www.openmicroscopy.org/Schemas/OME/2008-02}Institution' at 0x7f0535520278> {}


In [14]:
# Here we see the attrib dict is empty, so how do we actually extract the email?
email_tag = exp_tag[2]
print(dir(email_tag))

['__class__', '__copy__', '__deepcopy__', '__delattr__', '__delitem__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__len__', '__lt__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setitem__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', 'append', 'attrib', 'clear', 'extend', 'find', 'findall', 'findtext', 'get', 'getchildren', 'getiterator', 'insert', 'items', 'iter', 'iterfind', 'itertext', 'keys', 'makeelement', 'remove', 'set', 'tag', 'tail', 'text']


In [16]:
email_tag.text

'zmd@princeton.edu'

## Save the metadata xml file and view it in a browser (best option for viewing)

In [23]:
# Just need to write the xml string to a file
import os
output_filename = 'example_lightsheet_metadata.xml'
if not os.path.exists(output_filename):
    with open(output_filename,'w') as outfile:
        outfile.write(xml_description)
print("wrote %s" % output_filename)

wrote example_lightsheet_metadata.xml
