Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
245 changes: 245 additions & 0 deletions pyMCDS.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
import xml.etree.ElementTree as TE
import numpy as np
import pandas as pd
import scipy.io as sio
from pathlib import Path

class pyMCDS:
'''
This class contains a dictionary of dictionaries that contains all of the
output from a single time step of a PhysiCell Model. This class assumes that
all output files are stored in the same directory. Data is loaded by reading
the .xml file for a particular timestep.

Parameters
----------
xml_name : string
String containing the name of the xml file without the path
output_path : string (default: '.')
String containing the path (relative or absolute) to the directory
where PhysiCell output files are stored

Attributes
----------
data : dictionary
Hierarchical container for all of the data retrieved by parsing the xml
file and the files referenced therein.
'''
def __init__(self, xml_file, output_path='.'):
self.data = self._read_xml(xml_file, output_path)

def get_cells_df(self):
'''
Builds DataFrame from data['discrete_cells']

Returns
-------
cells_df : pd.Dataframe [n_cells, n_variables]
Dataframe containing the cell data for all cells at this time step
'''
cells_df = pd.DataFrame(self.data['discrete_cells'])
return cells_df

def get_menv_species_list(self):
'''
Returns list of chemical species in microenvironment

Returns
-------
species_list : array-like (str) [n_species,]
Contains names of chemical species in microenvironment
'''
species_list = []
for name in self.data['continuum_variables']:
species_list.append(name)

return species_list

def get_concentrations(self, species):
'''
Returns the concentration arrays for the specified chemical species
in the microenvironment.

Returns
-------
conc_arr : array-like (np.float) [x_voxels, y_voxels, z_voxels]
Contains the concentration of the specified chemical in each voxel.
The array spatially maps to a meshgrid of the voxel centers.
'''
conc_arr = self.data['continuum_variables'][species]['data']
return conc_arr

def get_time(self):
return self.data['metadata']['current_time']


def _read_xml(self, xml_file, output_path='.'):
'''
Does the actual work of initializing MultiCellDS by parsing the xml
'''

output_path = Path(output_path)
xml_file = output_path / xml_file
try:
tree = TE.parse(xml_file)
except:
print('Data File:', xml_file, 'not found!')
exit(0)

root = tree.getroot()
MCDS = {}

# Get current simulated time
metadata_node = root.find('metadata')
time_node = metadata_node.find('current_time')
MCDS['metadata'] = {}
MCDS['metadata']['current_time'] = float(time_node.text)
MCDS['metadata']['time_units'] = time_node.get('units')

# Get current runtime
time_node = metadata_node.find('current_runtime')
MCDS['metadata']['current_runtime'] = float(time_node.text)
MCDS['metadata']['runtime_units'] = time_node.get('units')

# find the microenvironment node
me_node = root.find('microenvironment')
me_node = me_node.find('domain')

# find the mesh node
mesh_node = me_node.find('mesh')
MCDS['metadata']['spatial_units'] = mesh_node.get('units')
MCDS['mesh'] = {}

# check for cartesian mesh
cartesian = False
mesh_type = mesh_node.get('type')
if mesh_type == 'Cartesian':
cartesian = True

# while we're at it, find the mesh
coord_str = mesh_node.find('x_coordinates').text
delimiter = mesh_node.find('x_coordinates').get('delimiter')
x_coords = np.array(coord_str.split(delimiter), dtype=np.float)

coord_str = mesh_node.find('y_coordinates').text
delimiter = mesh_node.find('y_coordinates').get('delimiter')
y_coords = np.array(coord_str.split(delimiter), dtype=np.float)

coord_str = mesh_node.find('z_coordinates').text
delimiter = mesh_node.find('z_coordinates').get('delimiter')
z_coords = np.array(coord_str.split(delimiter), dtype=np.float)

# reshape into a mesh grid
xx, yy, zz = np.meshgrid(x_coords, y_coords, z_coords)

MCDS['mesh']['x_coordinates'] = xx
MCDS['mesh']['y_coordinates'] = yy
MCDS['mesh']['z_coordinates'] = zz

# Voxel data must be loaded from .mat file
voxel_file = mesh_node.find('voxels').find('filename').text
voxel_path = output_path / voxel_file
try:
initial_mesh = sio.loadmat(voxel_path)['mesh']
except:
print('Data file', voxel_path, 'missing!')
print('Referenced in', xml_file)
exit(0)

# center of voxel specified by first three rows [ x, y, z ]
# volume specified by fourth row
MCDS['mesh']['voxels'] = {}
MCDS['mesh']['voxels']['centers'] = initial_mesh[:3, :]
MCDS['mesh']['voxels']['volumes'] = initial_mesh[3, :]

# Continuum_variables, unlike in the matlab version the individual chemical
# species will be primarily accessed through their names e.g.
# MCDS['continuum_variables']['oxygen']['units']
# MCDS['continuum_variables']['glucose']['data']
MCDS['continuum_variables'] = {}
variables_node = me_node.find('variables')
file_node = me_node.find('data').find('filename')

# micro environment data is shape [4+n, len(voxels)] where n is the number
# of species being tracked. the first 3 rows represent (x, y, z) of voxel
# centers. The fourth row contains the voxel volume. The 5th row and up will
# contain values for that species in that voxel.
me_file = file_node.text
me_path = output_path / me_file
try:
me_data = sio.loadmat(me_path)['multiscale_microenvironment']
except:
print('Data file', me_path, 'missing!')
print('Referenced in', xml_file)
exit(0)

var_children = variables_node.findall('variable')

for i, species in enumerate(var_children):
species_name = species.get('name')
MCDS['continuum_variables'][species_name] = {}
MCDS['continuum_variables'][species_name]['units'] = species.get(
'units')

# travel down one level on tree
species = species.find('physical_parameter_set')

# diffusion data for each species
MCDS['continuum_variables'][species_name]['diffusion_coefficient'] = {}
MCDS['continuum_variables'][species_name]['diffusion_coefficient']['value'] \
= float(species.find('diffusion_coefficient').text)
MCDS['continuum_variables'][species_name]['diffusion_coefficient']['units'] \
= species.find('diffusion_coefficient').get('units')

# decay data for each species
MCDS['continuum_variables'][species_name]['decay_rate'] = {}
MCDS['continuum_variables'][species_name]['decay_rate']['value'] \
= float(species.find('decay_rate').text)
MCDS['continuum_variables'][species_name]['decay_rate']['units'] \
= species.find('decay_rate').get('units')

# store data from microenvironment file as numpy array
MCDS['continuum_variables'][species_name]['data'] \
= me_data[4+i, :].reshape(xx.shape)

# in order to get to the good stuff we have to pass through a few different
# hierarchal levels
cell_node = root.find('cellular_information')
cell_node = cell_node.find('cell_populations')
cell_node = cell_node.find('cell_population')
cell_node = cell_node.find('custom')
# we want the PhysiCell data, there is more of it
for child in cell_node.findall('simplified_data'):
if child.get('source') == 'PhysiCell':
cell_node = child
break

MCDS['discrete_cells'] = {}
data_labels = []
# iterate over 'label's which are children of 'labels' these will be used to
# label data arrays
for label in cell_node.find('labels').findall('label'):
# I don't like spaces in my dictionary keys
fixed_label = label.text.replace(' ', '_')
if int(label.get('size')) > 1:
# tags to differentiate repeated labels (usually space related)
dir_label = ['_x', '_y', '_z']
for i in range(int(label.get('size'))):
data_labels.append(fixed_label + dir_label[i])
else:
data_labels.append(fixed_label)

# load the file
cell_file = cell_node.find('filename').text
cell_path = output_path / cell_file
try:
cell_data = sio.loadmat(cell_path)['cells']
except:
print('Data file', cell_path, 'missing!')
print('Referenced in', xml_file)
exit(0)

for col in range(len(data_labels)):
MCDS['discrete_cells'][data_labels[col]] = cell_data[col, :]

return MCDS
97 changes: 97 additions & 0 deletions pyMCDS_timeseries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from pyMCDS import pyMCDS

class pyMCDS_timeseries:
'''
This class contains a np.array of pyMCDS objects as well as functions for
extracting information from that list.

Parameters
----------
output_path : string
String containing the path (relative or absolute) to the directory
containing the PhysiCell output files

Attributes
----------
timeseries : array-like (pyMCDS) [n_timesteps,]
Numpy array of pyMCDS objects sorted by time.
'''
def __init__(self, output_path='.'):
# get a generator of output xml files sorted alphanumerically
sorted_files = sorted(Path(output_path).glob('output*.xml'))

# make a big array to keep them all, create a pyMCDS object with each
# self.timeseries = np.empty(len(sorted_files), dtype=pyMCDS)
# first we'll do a list, if its slow we'll figure out the array
ts_list = []
for f in sorted_files:
ts_list.append(pyMCDS(f.name, output_path))

self.timeseries = np.array(ts_list)

def get_times(self):
'''
Helper function to easily return the cumulative time at each step.

Returns
-------
time_array : ndaray (np.float) [n_timesteps,]
Contains the 'current time' at each recorded point.
'''
time_array = np.zeros(self.timeseries.shape[0])
for i in range(len(self.timeseries)):
time_array[i] = self.timeseries[i].get_time()

return time_array

def plot_menv_total(self, species):
'''
Plot total concentration of chemicals in the system over time.

Parameters
----------
species : array-like (string) or string
Name os names of chemical species to plot
'''
# set up arrays, quicker than lists
times = self.get_times()
total_conc = np.zeros((times.shape[0], len(species)))
# iterate over the times to fill array
for time in range(times.shape[0]):
# iterate over the chemical species to fill array
for i in range(len(species)):
conc_arr = self.timeseries[time].get_concentrations(species[i])
total_conc[time, i] = np.sum(conc_arr)
# establish plots
fig, ax = plt.subplots(figsize=(10, 8))
# add plot elements and show plot
for i, chem in enumerate(species):
ax.plot(times, total_conc[:, i], label=chem)
ax.set_xlabel('Time (min)')
ax.set_ylabel('Total concentration')
ax.legend()
plt.show()

def plot_cell_type_counts(self):
'''
Plot the absolute counts of different types of cells over time.
'''
times = self.get_times()
unique_types = np.unique(self.timeseries[0].data['discrete_cells']['cell_type'])
cell_counts = np.zeros((times.shape[0], unique_types.shape[0]))
for time_i in range(times.shape[0]):
df = self.timeseries[time_i].get_cells_df()
counts = df['cell_type'].value_counts()
cell_counts[time_i, :] = counts
fig, ax = plt.subplots(figsize=(10, 8))
for i in range(cell_counts.shape[1]):
ax.scatter(times, cell_counts[:, i], label=i, s=10)
ax.set_xlabel('Time (min)')
ax.set_ylabel('# of cells in system')
ax.legend()
plt.show()


Loading