In [2]:
import xml.etree.ElementTree as ET
import numpy as np
import os

#---------------------------------------------------------------------------------------------#

# Options: 'h_bulk'=HUVECs, 'mdck'=MDCKs, 'mda_bulk'=MDA-MB-231s
cell_type = 'mdck_kevin'

# Options: 0 or 1
do_subsample = 0
subsample_step = 2

# Options: 0 or 1
do_timerange = 0
num_hours = 6*8*2
start_frame = 6*8
end_frame = start_frame + num_hours

#---------------------------------------------------------------------------------------------#

if(cell_type=='h_bulk'):
    file_path_xml = './XMLs/HUVEC_Bulk'
    file_path_npy = './NPYs/HUVEC_Bulk'
    frame_range = 140
    frames_per_hour = 6.0
    blob_size = 12.0
    body_size = 25.0
    
if(cell_type=='h_bulk_cut'):
    file_path_xml = './XMLs/HUVEC_Bulk'
    file_path_npy = './NPYs/HUVEC_Bulk_46'
    frame_range = 140
    frames_per_hour = 6.0
    blob_size = 12.0
    body_size = 25.0
    
if(cell_type=='mdck_bulk'):
    file_path_xml = './XMLs/MDCK_Bulk'
    file_path_npy = './NPYs/MDCK_Bulk'
    frame_range = 49
    frames_per_hour = 6.0
    blob_size = 15.0
    body_size = 35.0
    
if(cell_type=='mdck_edge'):
    file_path_xml = './XMLs/MDCK_Edge'
    file_path_npy = './NPYs/MDCK_Edge'
    frame_range = 49
    frames_per_hour = 6.0
    blob_size = 15.0
    body_size = 35.0
    
if(cell_type=='mda_bulk'):
    file_path_xml = './XMLs/MDA_MB_231_Bulk'
    file_path_npy = './NPYs/MDA_MB_231_Bulk'
    frame_range = 97
    frames_per_hour = 6.0
    blob_size = 15.0
    body_size = 35.0    
    
if(cell_type=='mdck_kevin'):
    file_path_xml = './XMLs/Kevin_After'
    file_path_npy = './NPYs/Kevin_After'
    frame_range = 60
    frames_per_hour = 3.0
    blob_size = 6.0
    body_size = 15.0
    
if(do_subsample==1):
    file_path_npy = file_path_npy + '_Subsample_' + str(subsample_step) 
if(do_timerange==1):
    file_path_npy = file_path_npy + '_TimeRange_' + str(start_frame) + '_' + str(end_frame) 
if not os.path.exists(file_path_npy):
    os.makedirs(file_path_npy)

#---------------------------------------------------------------------------------------------#

# Get all XML files in that path: 
file_list = [f for f in os.listdir(file_path_xml) if os.path.isfile(os.path.join(file_path_xml, f)) and f.endswith('.xml')]
file_list.sort()
print(file_list)

# Loop over XML files (each representing one tissue):
for i in range(len(file_list)):
    
    print(i)
        
    # Get the XML filename: 
    filename = file_path_xml + '/' + file_list[i]
    
    # Use library to parse XML file: 
    root = ET.parse(filename).getroot()
            
    # Empty initial array for storing trajectories: 
    trajectories = np.empty([len(root.findall('particle')), frame_range, 2])
        
    # Loop over all tracks: 
    counter = 0
    for type_tag in root.findall('particle'):
        #if(type_tag.get('nSpots')!=str(frame_range)):
        #    print(type_tag.get('nSpots'))
        #    print(i)
        #    print('Error')
        #    break
            
        # Get x, y trajectory values for each individual track: 
        x_store = []
        y_store = []
        for detection in type_tag:
            x_store.append(detection.get('x'))
            y_store.append(detection.get('y'))
            
        #print(len(x_store))
        #print(len(y_store))
        
        #print(counter)
            
        # Add trajectories to numpy array containing ALL tracks: 
        xtraj_temp = np.asarray(x_store) #, dtype=np.float32)
        ytraj_temp = np.asarray(y_store)
        traj_temp = np.empty([frame_range, 2])
        
        if(len(xtraj_temp) == frame_range):
            traj_temp[:,0] = xtraj_temp
            traj_temp[:,1] = ytraj_temp
        
            trajectories[counter,:,:] = traj_temp
            counter += 1
        
    traj_store = trajectories[:(counter-1), :, :]
    
    traj_store_real = np.transpose(traj_store, (1, 0, 2))
    print(traj_store_real.shape)
    
    if(do_timerange==1):
        traj_store_real = traj_store_real[start_frame:end_frame, :, :]
        #traj_store_real = traj_store_real[start_frame:, :, :]

        print(traj_store_real.shape)
    if(do_subsample==1):
        traj_store_real = traj_store_real[::subsample_step, :, :]
        print(traj_store_real.shape)
    
                
    # ----------------------------------------------------------------    
    # Save data for EACH video now: 
    cells =	{
      'frames_per_second': frames_per_hour,
      'video_path': "All",
      'trajectories': traj_store_real,
        'git_commit': 'None\n', 
        'body_lenght': body_size, 
        'body_lentgh': body_size, 
        'body_length': body_size
    }

    save_traj_name = file_path_npy + '/npy_' + str(i).zfill(3) + '.npy'
    print(save_traj_name)
    np.save(save_traj_name, cells)

['Post_Contact_inhibition_1_Tracks.xml', 'Post_Contact_inhibition_2_Tracks.xml', 'Post_Contact_inhibition_3_Tracks.xml', 'Post_Contact_inhibition_4_Tracks.xml', 'Post_Contact_inhibition_5_Tracks.xml']
0
(60, 11307, 2)
./NPYs/Kevin_After/npy_000.npy
1
(60, 12155, 2)
./NPYs/Kevin_After/npy_001.npy
2
(60, 11733, 2)
./NPYs/Kevin_After/npy_002.npy
3
(60, 12660, 2)
./NPYs/Kevin_After/npy_003.npy
4
(60, 12596, 2)
./NPYs/Kevin_After/npy_004.npy


In [None]:
import shutil
import random

# If you wanna do the test/train split here:

num_in_training_set = 10

file_list = [f for f in os.listdir(file_path_npy) if os.path.isfile(os.path.join(file_path_npy, f)) and f.endswith('.npy')]
file_list.sort()
print(file_list)

train_dir = file_path_npy + '/train'
test_dir = file_path_npy + '/test'

if not os.path.exists(train_dir):
    os.makedirs(train_dir)
if not os.path.exists(test_dir):
    os.makedirs(test_dir)
    
num_files = len(file_list)

random.seed(42)
random_inds = random.sample((np.arange(len(file_list)).tolist()), num_in_training_set)

for i in range(len(file_list)):
    
    # Get the NPY filename: 
    old_filename = file_path_npy + '/' + file_list[i]
    
    # Training or test set? 
    if(i in random_inds):
        new_filename = train_dir + '/' + file_list[i]
    else:
        new_filename = test_dir + '/' + file_list[i]
    shutil.move(old_filename, new_filename)
    
    
