In [1]:
from lxml import etree as ET
import pandas as pd
import os
from xml_utils import xml_pretty_print
import numpy as np

# Import DLC database as a template

In [2]:
dlc_fname = 'CollectedData_Charlie.h5'

df_original = pd.read_hdf(dlc_fname)
df_original

scorer,Charlie,Charlie,Charlie,Charlie,Charlie,Charlie,Charlie,Charlie,Charlie,Charlie,Charlie,Charlie,Charlie,Charlie,Charlie,Charlie,Charlie,Charlie,Charlie,Charlie,Charlie
bodyparts,neuron0,neuron0,neuron0,neuron1,neuron1,neuron1,neuron2,neuron2,neuron2,neuron3,...,neuron30,neuron31,neuron31,neuron31,neuron32,neuron32,neuron32,neuron33,neuron33,neuron33
coords,x,y,z,x,y,z,x,y,z,x,...,z,x,y,z,x,y,z,x,y,z
labeled-data/test_100frames.ome/img0.tif,216.387579,340.884543,11.0,245.041642,324.087333,11.0,259.862709,315.194693,11.0,299.385555,...,10.0,468.465601,271.216927,23.0,455.189248,253.515123,21.0,465.304565,254.14733,21.0
labeled-data/test_100frames.ome/img1.tif,351.753325,464.393435,11.0,358.669823,427.834803,11.0,360.645965,412.025665,11.0,385.347744,...,10.0,497.547136,249.721879,23.0,482.374162,241.503184,21.0,489.960649,235.813319,21.0
labeled-data/test_100frames.ome/img2.tif,411.037593,519.725419,11.0,421.906376,486.131,11.0,426.846732,471.309933,11.0,444.632012,...,10.0,484.270783,232.652282,23.0,467.833394,241.503184,21.0,467.833394,230.755661,21.0
labeled-data/test_100frames.ome/img3.tif,354.717539,500.952067,11.0,380.407388,476.250289,11.0,391.276171,464.393435,11.0,416.96602,...,10.0,496.282722,241.503184,20.0,484.902991,246.560843,21.0,483.638576,234.548904,21.0
labeled-data/test_100frames.ome/img4.tif,272.707634,416.96602,11.0,307.290124,410.049522,11.0,324.087333,406.097238,11.0,353.729467,...,10.0,515.881148,239.606563,20.0,501.34038,240.870977,21.0,505.765831,229.491246,21.0


In [3]:
scorer = df_original.columns.levels[0][0]
scorer

'Charlie'

In [4]:
df_original = df_original.sort_index()
all_files = df_original.index
print(all_files)

Index(['labeled-data/test_100frames.ome/img0.tif',
       'labeled-data/test_100frames.ome/img1.tif',
       'labeled-data/test_100frames.ome/img2.tif',
       'labeled-data/test_100frames.ome/img3.tif',
       'labeled-data/test_100frames.ome/img4.tif'],
      dtype='object')


# Import XML from Icy

In [5]:
fname = os.path.join('few_frame_videos', 'icy_40neurons_tracked.xml')
et_icy = ET.parse(fname)

In [6]:
et2 = et_icy.getroot()
num_neurons = len(et2) - 2
print("Found {} neurons".format(num_neurons))

Found 35 neurons


In [7]:
i = 2
print(type(et2[i][0]))
print(len(et2[i][0]))
et2[i][0][1].get('x')
et2[i][0][0].get('x')

<class 'lxml.etree._Element'>
5


'216.3875792280494'

In [8]:
xml_pretty_print(et2[2], 1)

track
{'id': '-744387789'}


In [9]:
len(et2[1])

0

# Write dataframe in DLC format

In [11]:

relativeimagenames=df_original.index
print(relativeimagenames)
print("Assumes filenames in the DLC annotation are same as the Icy tracker, after alphabetizing")

output_path = '.'

dataFrame = None
coords = np.empty((len(relativeimagenames),3,))
i_neuron_name = 0

# Build correctly DLC-formatted dataframe
for i_neuron in range(num_neurons):
    bodypart = 'neuron{}'.format(i_neuron_name)
    
    i_xml = i_neuron + 1 # The first entry in the xml file is the 'trackfile' class
    this_detections = et2[i_xml]
    if len(this_detections) == 0:
        print("No tracks found for entry {}; continuing".format(i_neuron))
        continue
    elif len(this_detections) > 1:
        error("More than one track found in this group; formatting not supported; aborting")
    else:
        this_detections = this_detections[0]
        i_neuron_name = i_neuron_name + 1

    # Get xyz coordinates for one neuron, for all files
    for i2 in range(len(relativeimagenames)):
#         xml_pretty_print(this_detections)
#         print(len(this_detections), i_xml)
        this_track = this_detections[i2]
        coords[i2,:] = np.array([int(float(this_track.get('x'))), 
                                 int(float(this_track.get('y'))), 
                                 int(float(this_track.get('z'))) ])

    index = pd.MultiIndex.from_product([[scorer], [bodypart],
                                        ['x', 'y', 'z']],
                                        names=['scorer', 'bodyparts', 'coords'])

    frame = pd.DataFrame(coords, columns = index, index = relativeimagenames)
    dataFrame = pd.concat([dataFrame, frame],axis=1)


dataFrame.to_csv(os.path.join(output_path,"CollectedData_" + scorer + ".csv"))
dataFrame.to_hdf(os.path.join(output_path,"CollectedData_" + scorer + '.h5'),'df_with_missing',format='table', mode='w')

    
print("Finished")

Index(['labeled-data/test_100frames.ome/img0.tif',
       'labeled-data/test_100frames.ome/img1.tif',
       'labeled-data/test_100frames.ome/img2.tif',
       'labeled-data/test_100frames.ome/img3.tif',
       'labeled-data/test_100frames.ome/img4.tif'],
      dtype='object')
Assumes filenames in the DLC annotation are same as the Icy tracker, after alphabetizing
No tracks found for entry 0; continuing
Finished
