diff --git a/matlab_example/citation and license.txt b/matlab_example/citation and license.txt new file mode 100755 index 0000000..a609d83 --- /dev/null +++ b/matlab_example/citation and license.txt @@ -0,0 +1,14 @@ +This dataset is posted online with the Creative Commons Attribution 4.0 International (CC BY 4.0) license. + +It requires that you cite the source of this dataset for *any* reuse: + +A. Ghaffarizadeh, S.H. Friedman, S.M. Mumenthaler, and P. Macklin. +PhysiCell: an Open Source Physics-Based Cell Simulator for 3-D Multicellular Systems. +PLoS Comput. Biol., 2017 (preprint, in review). DOI: 10.1101/088773. + +Please check http://PhysiCell.MathCancer.org for the most recent citation information as +this article undergoes peer review. + +For the full text and legal conditions of the CC BY 4.0 license, see: + +https://creativecommons.org/licenses/by/4.0/ diff --git a/single_set/initial_mesh0.mat b/matlab_example/initial_mesh0.mat similarity index 100% rename from single_set/initial_mesh0.mat rename to matlab_example/initial_mesh0.mat diff --git a/single_set/output00000001.xml b/matlab_example/output00003696.xml similarity index 93% rename from single_set/output00000001.xml rename to matlab_example/output00003696.xml index 37bd92f..d7ada9b 100644 --- a/single_set/output00000001.xml +++ b/matlab_example/output00003696.xml @@ -3,17 +3,17 @@ BioFVM - 1.1.7 + 1.1.6 http://BioFVM.MathCancer.org - 60.000000 - 343.111859 - 2019-09-04T14:51:13Z - 2019-09-04T14:51:13Z + 30239.999998 + 191255.188684 + 2017-08-27T04:28:34Z + 2017-08-27T04:28:34Z @@ -43,7 +43,7 @@ - output00000001_microenvironment0.mat + output00003696_microenvironment0.mat @@ -52,7 +52,7 @@ - output00000001_cells.mat + output00003696_cells.mat @@ -81,7 +81,7 @@ - output00000001_cells_physicell.mat + output00003696_cells_physicell.mat diff --git a/matlab_example/output00003696_cells.mat b/matlab_example/output00003696_cells.mat new file mode 100644 index 0000000..8a184a3 Binary files /dev/null and b/matlab_example/output00003696_cells.mat differ diff --git a/matlab_example/output00003696_cells_physicell.mat b/matlab_example/output00003696_cells_physicell.mat new file mode 100644 index 0000000..a6df7f0 Binary files /dev/null and b/matlab_example/output00003696_cells_physicell.mat differ diff --git a/single_set/output00000001_microenvironment0.mat b/matlab_example/output00003696_microenvironment0.mat similarity index 67% rename from single_set/output00000001_microenvironment0.mat rename to matlab_example/output00003696_microenvironment0.mat index e8342ba..1387054 100644 Binary files a/single_set/output00000001_microenvironment0.mat and b/matlab_example/output00003696_microenvironment0.mat differ diff --git a/matlab_example/snapshot00003696.svg b/matlab_example/snapshot00003696.svg new file mode 100644 index 0000000..5a360c3 --- /dev/null +++ b/matlab_example/snapshot00003696.svg @@ -0,0 +1,3711 @@ + + + + + + + + Current time: 21 days, 0 hours, and 0.00 minutes, z = 0.00 μm + + + 66978 agents + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 200 μm + + + 2 days, 5 hours, 7 minutes, and 35.5525 seconds + + + diff --git a/pyMCDS.py b/pyMCDS.py index b9e25d3..a34245b 100644 --- a/pyMCDS.py +++ b/pyMCDS.py @@ -1,90 +1,331 @@ -import xml.etree.ElementTree as TE +import xml.etree.ElementTree as ET import numpy as np import pandas as pd import scipy.io as sio +import sys +import warnings from pathlib import Path class pyMCDS: - ''' + """ This class contains a dictionary of dictionaries that contains all of the output from a single time step of a PhysiCell Model. This class assumes that all output files are stored in the same directory. Data is loaded by reading the .xml file for a particular timestep. - + Parameters ---------- - xml_name : string + xml_name: str String containing the name of the xml file without the path - output_path : string (default: '.') + output_path: str, optional String containing the path (relative or absolute) to the directory - where PhysiCell output files are stored - + where PhysiCell output files are stored (default= ".") + Attributes ---------- - data : dictionary + data : dict Hierarchical container for all of the data retrieved by parsing the xml file and the files referenced therein. - ''' + """ def __init__(self, xml_file, output_path='.'): self.data = self._read_xml(xml_file, output_path) - def get_cells_df(self): - ''' - Builds DataFrame from data['discrete_cells'] + ## METADATA RELATED FUNCTIONS + + def get_time(self): + return self.data['metadata']['current_time'] + + ## MESH RELATED FUNCTIONS + + def get_mesh(self, flat=False): + """ + Return a meshgrid of the computational domain. Can return either full + 3D or a 2D plane for contour plots. + + Parameters + ---------- + flat : bool + If flat is set to true, we return only the x and y meshgrid. + Otherwise we return x, y, and z Returns ------- - cells_df : pd.Dataframe [n_cells, n_variables] - Dataframe containing the cell data for all cells at this time step - ''' - cells_df = pd.DataFrame(self.data['discrete_cells']) - return cells_df + splitting : list length=2 if flat=True, else length=3 + Contains arrays of voxel center coordinates as meshgrid with shape + [nx_voxel, ny_voxel, nz_voxel] or [nx_voxel, ny_voxel] if flat=True. + """ + if flat == True: + xx = self.data['mesh']['x_coordinates'][:, :, 0] + yy = self.data['mesh']['y_coordinates'][:, :, 0] + + return [xx, yy] + + # if we dont want a plane just return appropriate values + else: + xx = self.data['mesh']['x_coordinates'] + yy = self.data['mesh']['y_coordinates'] + zz = self.data['mesh']['z_coordinates'] + + return [xx, yy, zz] + + def get_2D_mesh(self): + """ + This function returns the x, y meshgrid as two numpy arrays. It is + identical to get_mesh with the option flat=True + + Returns + ------- + splitting : list length=2 + Contains arrays of voxel center coordinates in x and y dimensions + as meshgrid with shape [nx_voxel, ny_voxel] + """ + xx = self.data['mesh']['x_coordinates'][:, :, 0] + yy = self.data['mesh']['y_coordinates'][:, :, 0] + + return [xx, yy] + + def get_linear_voxels(self): + """ + Helper function to quickly grab voxel centers array stored linearly as + opposed to meshgrid-style. + """ + return self.data['mesh']['voxels']['centers'] + + def get_mesh_spacing(self): + """ + Returns the space in between voxel centers for the mesh in terms of the + mesh's spatial units. Assumes that voxel centers fall on integer values. + + Returns + ------- + dx : float + Distance between voxel centers in the same units as the other + spatial measurements + """ + centers = self.get_linear_voxels() + X = np.unique(centers[0, :]) + Y = np.unique(centers[1, :]) + Z = np.unique(centers[2, :]) + + dx = (X.max() - X.min()) / X.shape[0] + dy = (Y.max() - Y.min()) / Y.shape[0] + dz = (Z.max() - Z.min()) / Z.shape[0] + + if np.abs(dx - dy) > 1e-10 or np.abs(dy - dz) > 1e-10 \ + or np.abs(dx - dz) > 1e-10: + print('Warning: grid spacing may be axis dependent.') + + return round(dx) + + def get_containing_voxel_ijk(self, x, y, z): + """ + Internal function to get the meshgrid indices for the center of a voxel + that contains the given position. + + Note that pyMCDS stores meshgrids as 'cartesian' + (indexing='xy' in np.meshgrid) which means that we will have + to use these indices as [j, i, k] on the actual meshgrid objects + + Parameters + ---------- + x : float + x-coordinate for the position + y : float + y-coordinate for the position + z : float + z-coordinate for the position + + Returns + ------- + ijk : list length=3 + contains the i, j, and k indices for the containing voxel's center + """ + xx, yy, zz = self.get_mesh() + ds = self.get_mesh_spacing() + + if x > xx.max(): + warnings.warn('Position out of bounds: x out of bounds in pyMCDS._get_voxel_idx({0}, {1}, {2}). Setting x = x_max!'.format(x, y, z)) + x = xx.max() + elif x < xx.min(): + warnings.warn('Position out of bounds: x out of bounds in pyMCDS._get_voxel_idx({0}, {1}, {2}). Setting x = x_min!'.format(x, y, z)) + x = xx.min() + elif y > yy.max(): + warnings.warn('Position out of bounds: y out of bounds in pyMCDS._get_voxel_idx({0}, {1}, {2}). Setting y = y_max!'.format(x, y, z)) + y = yy.max() + elif y < yy.min(): + warnings.warn('Position out of bounds: y out of bounds in pyMCDS._get_voxel_idx({0}, {1}, {2}). Setting y = y_min!'.format(x, y, z)) + y = yy.min() + elif z > zz.max(): + warnings.warn('Position out of bounds: z out of bounds in pyMCDS._get_voxel_idx({0}, {1}, {2}). Setting z = z_max!'.format(x, y, z)) + z = zz.max() + elif z < zz.min(): + warnings.warn('Position out of bounds: z out of bounds in pyMCDS._get_voxel_idx({0}, {1}, {2}). Setting z = z_min!'.format(x, y, z)) + z = zz.min() - def get_menv_species_list(self): - ''' + i = np.round((x - xx.min()) / ds) + j = np.round((y - yy.min()) / ds) + k = np.round((z - zz.min()) / ds) + + ii, jj, kk = int(i), int(j), int(k) + + return [ii, jj, kk] + + ## MICROENVIRONMENT RELATED FUNCTIONS + + def get_substrate_names(self): + """ Returns list of chemical species in microenvironment Returns ------- - species_list : array-like (str) [n_species,] + species_list : array (str), shape=[n_species,] Contains names of chemical species in microenvironment - ''' + """ species_list = [] for name in self.data['continuum_variables']: species_list.append(name) - - return species_list - - def get_concentrations(self, species): - ''' - Returns the concentration arrays for the specified chemical species - in the microenvironment. + return species_list + + def get_concentrations(self, species_name, z_slice=None): + """ + Returns the concentration array for the specified chemical species + in the microenvironment. Can return either the whole 3D picture, or + a 2D plane of concentrations. + + Parameters + ---------- + species_name : str + Name of the chemical species for which to get concentrations + + z_slice : float + z-axis position to use as plane for 2D output. This value must match + a plane of voxel centers in the z-axis. Returns ------- - conc_arr : array-like (np.float) [x_voxels, y_voxels, z_voxels] + conc_arr : array (np.float) shape=[nx_voxels, ny_voxels, nz_voxels] Contains the concentration of the specified chemical in each voxel. The array spatially maps to a meshgrid of the voxel centers. - ''' - conc_arr = self.data['continuum_variables'][species]['data'] - return conc_arr + """ + if z_slice is not None: + # check to see that z_slice is a valid plane + zz = self.data['mesh']['z_coordinates'] + assert z_slice in zz, 'Specified z_slice {} not in z_coordinates'.format(z_slice) + + # do the processing if its ok + mask = zz == z_slice + full_conc = self.data['continuum_variables'][species_name]['data'] + conc_arr = full_conc[mask].reshape((zz.shape[0], zz.shape[1])) + else: + conc_arr = self.data['continuum_variables'][species_name]['data'] - def get_time(self): - return self.data['metadata']['current_time'] + return conc_arr + def get_concentrations_at(self, x, y, z): + """ + Return concentrations of each chemical species inside a particular voxel + that contains the point described in the arguments. + Parameters + ---------- + x : float + x-position for the point of interest + y : float + y_position for the point of interest + z : float + z_position for the point of interest + + Returns + ------- + concs : array, shape=[n_substrates,] + array of concentrations in the order given by get_substrate_names() + """ + i, j, k = self.get_containing_voxel_ijk(x, y, z) + sub_name_list = self.get_substrate_names() + concs = np.zeros(len(sub_name_list)) + + for ix in range(len(sub_name_list)): + concs[ix] = self.get_concentrations(sub_name_list[ix])[j, i, k] + + return concs + + + ## CELL RELATED FUNCTIONS + + def get_cell_df(self): + """ + Builds DataFrame from data['discrete_cells'] + + Returns + ------- + cells_df : pd.Dataframe, shape=[n_cells, n_variables] + Dataframe containing the cell data for all cells at this time step + """ + cells_df = pd.DataFrame(self.data['discrete_cells']) + return cells_df + + def get_cell_variables(self): + """ + Returns the names of all of the cell variables tracked in ['discrete cells'] + dictionary + + Returns + ------- + var_list : list, shape=[n_variables] + Contains the names of the cell variables + """ + var_list = [] + for name in self.data['discrete_cells']: + var_list.append(name) + return var_list + + def get_cell_df_at(self, x, y, z): + """ + Returns a dataframe for cells in the same voxel as the position given by + x, y, and z. + + Parameters + ---------- + x : float + x-position for the point of interest + y : float + y_position for the point of interest + z : float + z_position for the point of interest + + Returns + ------- + vox_df : pd.DataFrame, shape=[n_cell_in_voxel, n_variables] + cell dataframe containing only cells in the same voxel as the point + specified by x, y, and z. + """ + ds = self.get_mesh_spacing() + xx, yy, zz = self.get_mesh() + i, j, k = self.get_containing_voxel_ijk(x, y, z) + x_vox = xx[j, i, k] + y_vox = yy[j, i, k] + z_vox = zz[j, i, k] + + cell_df = self.get_cell_df() + inside_voxel = ( (cell_df['position_x'] < x_vox + ds/2.) & + (cell_df['position_x'] > x_vox - ds/2.) & + (cell_df['position_y'] < y_vox + ds/2.) & + (cell_df['position_y'] > y_vox - ds/2.) & + (cell_df['position_z'] < z_vox + ds/2.) & + (cell_df['position_z'] > z_vox - ds/2.) ) + vox_df = cell_df[inside_voxel] + return vox_df + def _read_xml(self, xml_file, output_path='.'): - ''' + """ Does the actual work of initializing MultiCellDS by parsing the xml - ''' + """ output_path = Path(output_path) xml_file = output_path / xml_file - try: - tree = TE.parse(xml_file) - except: - print('Data File:', xml_file, 'not found!') - exit(0) + tree = ET.parse(xml_file) + + print('Reading {}'.format(xml_file)) root = tree.getroot() MCDS = {} @@ -110,12 +351,6 @@ def _read_xml(self, xml_file, output_path='.'): MCDS['metadata']['spatial_units'] = mesh_node.get('units') MCDS['mesh'] = {} - # check for cartesian mesh - cartesian = False - mesh_type = mesh_node.get('type') - if mesh_type == 'Cartesian': - cartesian = True - # while we're at it, find the mesh coord_str = mesh_node.find('x_coordinates').text delimiter = mesh_node.find('x_coordinates').get('delimiter') @@ -142,9 +377,11 @@ def _read_xml(self, xml_file, output_path='.'): try: initial_mesh = sio.loadmat(voxel_path)['mesh'] except: - print('Data file', voxel_path, 'missing!') - print('Referenced in', xml_file) - exit(0) + raise FileNotFoundError( + "No such file or directory:\n'{}' referenced in '{}'".format(voxel_path, xml_file)) + sys.exit(1) + + print('Reading {}'.format(voxel_path)) # center of voxel specified by first three rows [ x, y, z ] # volume specified by fourth row @@ -166,21 +403,33 @@ def _read_xml(self, xml_file, output_path='.'): # contain values for that species in that voxel. me_file = file_node.text me_path = output_path / me_file + # Changes here try: me_data = sio.loadmat(me_path)['multiscale_microenvironment'] except: - print('Data file', me_path, 'missing!') - print('Referenced in', xml_file) - exit(0) + raise FileNotFoundError( + "No such file or directory:\n'{}' referenced in '{}'".format(me_path, xml_file)) + sys.exit(1) + + print('Reading {}'.format(me_path)) var_children = variables_node.findall('variable') - for i, species in enumerate(var_children): + # we're going to need the linear x, y, and z coordinates later + # but we dont need to get them in the loop + X, Y, Z = np.unique(xx), np.unique(yy), np.unique(zz) + + for si, species in enumerate(var_children): species_name = species.get('name') MCDS['continuum_variables'][species_name] = {} MCDS['continuum_variables'][species_name]['units'] = species.get( 'units') + print('Parsing {:s} data'.format(species_name)) + + # initialize array for concentration data + MCDS['continuum_variables'][species_name]['data'] = np.zeros(xx.shape) + # travel down one level on tree species = species.find('physical_parameter_set') @@ -198,9 +447,18 @@ def _read_xml(self, xml_file, output_path='.'): MCDS['continuum_variables'][species_name]['decay_rate']['units'] \ = species.find('decay_rate').get('units') - # store data from microenvironment file as numpy array - MCDS['continuum_variables'][species_name]['data'] \ - = me_data[4+i, :].reshape(xx.shape) + # store data from microenvironment file as numpy array + # iterate over each voxel + for vox_idx in range(MCDS['mesh']['voxels']['centers'].shape[1]): + # find the center + center = MCDS['mesh']['voxels']['centers'][:, vox_idx] + + i = np.where(np.abs(center[0] - X) < 1e-10)[0][0] + j = np.where(np.abs(center[1] - Y) < 1e-10)[0][0] + k = np.where(np.abs(center[2] - Z) < 1e-10)[0][0] + + MCDS['continuum_variables'][species_name]['data'][j, i, k] \ + = me_data[4+si, vox_idx] # in order to get to the good stuff we have to pass through a few different # hierarchal levels @@ -235,9 +493,11 @@ def _read_xml(self, xml_file, output_path='.'): try: cell_data = sio.loadmat(cell_path)['cells'] except: - print('Data file', cell_path, 'missing!') - print('Referenced in', xml_file) - exit(0) + raise FileNotFoundError( + "No such file or directory:\n'{}' referenced in '{}'".format(cell_path, xml_file)) + sys.exit(1) + + print('Reading {}'.format(cell_path)) for col in range(len(data_labels)): MCDS['discrete_cells'][data_labels[col]] = cell_data[col, :] diff --git a/read_MultiCellDS_xml.py b/read_MultiCellDS_xml.py index 540d676..b1a1690 100644 --- a/read_MultiCellDS_xml.py +++ b/read_MultiCellDS_xml.py @@ -1,18 +1,17 @@ -import xml.etree.ElementTree as TE +import xml.etree.ElementTree as ET import numpy as np import pandas as pd import scipy.io as sio from pathlib import Path -def read_MultiCellDS_xml(file_name, output_dir = '.'): - - output_path = Path(output_dir) - xml_file = output_path / 'output00000001.xml' - try: - tree = TE.parse(xml_file) - except: - print('Data File:', xml_file, 'not found!') - exit(0) + +def read_MultiCellDS_xml(xml_file, output_path='.'): + """ + Parses xml and returns a dictionary + """ + output_path = Path(output_path) + xml_file = output_path / xml_file + tree = ET.parse(xml_file) root = tree.getroot() MCDS = {} @@ -38,12 +37,6 @@ def read_MultiCellDS_xml(file_name, output_dir = '.'): MCDS['metadata']['spatial_units'] = mesh_node.get('units') MCDS['mesh'] = {} - # check for cartesian mesh - cartesian = False - mesh_type = mesh_node.get('type') - if mesh_type == 'Cartesian': - cartesian = True - # while we're at it, find the mesh coord_str = mesh_node.find('x_coordinates').text delimiter = mesh_node.find('x_coordinates').get('delimiter') @@ -70,9 +63,9 @@ def read_MultiCellDS_xml(file_name, output_dir = '.'): try: initial_mesh = sio.loadmat(voxel_path)['mesh'] except: - print('Data file', voxel_path, 'missing!') - print('Referenced in', xml_file) - exit(0) + raise FileNotFoundError( + "No such file or directory:\n'{}' referenced in '{}'".format(voxel_path, xml_file)) + sys.exit(1) # center of voxel specified by first three rows [ x, y, z ] # volume specified by fourth row @@ -89,25 +82,35 @@ def read_MultiCellDS_xml(file_name, output_dir = '.'): file_node = me_node.find('data').find('filename') # micro environment data is shape [4+n, len(voxels)] where n is the number - # of species being tracked. the first 3 rows represent (x, y, z) of voxel + # of species being tracked. the first 3 rows represent (x, y, z) of voxel # centers. The fourth row contains the voxel volume. The 5th row and up will # contain values for that species in that voxel. me_file = file_node.text me_path = output_path / me_file + # Changes here try: me_data = sio.loadmat(me_path)['multiscale_microenvironment'] except: - print('Data file', me_path, 'missing!') - print('Referenced in', xml_file) - exit(0) + raise FileNotFoundError( + "No such file or directory:\n'{}' referenced in '{}'".format(me_path, xml_file)) + sys.exit(1) var_children = variables_node.findall('variable') - for i, species in enumerate(var_children): + # we're going to need the linear x, y, and z coordinates later + # but we dont need to get them in the loop + X, Y, Z = np.unique(xx), np.unique(yy), np.unique(zz) + + for si, species in enumerate(var_children): species_name = species.get('name') MCDS['continuum_variables'][species_name] = {} - MCDS['continuum_variables'][species_name]['units'] = species.get('units') - + MCDS['continuum_variables'][species_name]['units'] = species.get( + 'units') + + # initialize array for concentration data + MCDS['continuum_variables'][species_name]['data'] = np.zeros( + xx.shape) + # travel down one level on tree species = species.find('physical_parameter_set') @@ -124,10 +127,19 @@ def read_MultiCellDS_xml(file_name, output_dir = '.'): = float(species.find('decay_rate').text) MCDS['continuum_variables'][species_name]['decay_rate']['units'] \ = species.find('decay_rate').get('units') - + # store data from microenvironment file as numpy array - MCDS['continuum_variables'][species_name]['data'] \ - = me_data[4+i, :].reshape(xx.shape) + # iterate over each voxel + for vox_idx in range(MCDS['mesh']['voxels']['centers'].shape[1]): + # find the center + center = MCDS['mesh']['voxels']['centers'][:, vox_idx] + + i = np.where(np.abs(center[0] - X) < 1e-10)[0][0] + j = np.where(np.abs(center[1] - Y) < 1e-10)[0][0] + k = np.where(np.abs(center[2] - Z) < 1e-10)[0][0] + + MCDS['continuum_variables'][species_name]['data'][j, i, k] \ + = me_data[4+si, vox_idx] # in order to get to the good stuff we have to pass through a few different # hierarchal levels @@ -142,32 +154,31 @@ def read_MultiCellDS_xml(file_name, output_dir = '.'): break MCDS['discrete_cells'] = {} - df_labels = [] + data_labels = [] # iterate over 'label's which are children of 'labels' these will be used to - # label dataframe columns + # label data arrays for label in cell_node.find('labels').findall('label'): + # I don't like spaces in my dictionary keys + fixed_label = label.text.replace(' ', '_') if int(label.get('size')) > 1: + # tags to differentiate repeated labels (usually space related) dir_label = ['_x', '_y', '_z'] for i in range(int(label.get('size'))): - df_labels.append(label.text + dir_label[i]) + data_labels.append(fixed_label + dir_label[i]) else: - df_labels.append(label.text) - + data_labels.append(fixed_label) + # load the file cell_file = cell_node.find('filename').text cell_path = output_path / cell_file try: cell_data = sio.loadmat(cell_path)['cells'] except: - print('Data file', cell_path, 'missing!') - print('Referenced in', xml_file) - exit(0) - - for col in range(len(df_labels)): - MCDS['discrete_cells'][df_labels[col]] = cell_data[col, :] + raise FileNotFoundError( + "No such file or directory:\n'{}' referenced in '{}'".format(cell_path, xml_file)) + sys.exit(1) - # we will save the cell information as a dataframe in the MCDS object - # cell_df = pd.DataFrame(cell_data, columns=df_labels) - # MCDS['discrete_cells']['data_df'] = cell_df + for col in range(len(data_labels)): + MCDS['discrete_cells'][data_labels[col]] = cell_data[col, :] return MCDS diff --git a/single_set/output00000001_cells.mat b/single_set/output00000001_cells.mat deleted file mode 100644 index e10fc91..0000000 Binary files a/single_set/output00000001_cells.mat and /dev/null differ diff --git a/single_set/output00000001_cells_physicell.mat b/single_set/output00000001_cells_physicell.mat deleted file mode 100644 index 92cbf7e..0000000 Binary files a/single_set/output00000001_cells_physicell.mat and /dev/null differ diff --git a/test_timeseries.py b/test_timeseries.py index 60270c6..62e7724 100644 --- a/test_timeseries.py +++ b/test_timeseries.py @@ -5,4 +5,4 @@ chem_list = mcds.timeseries[0].get_menv_species_list() mcds.plot_menv_total(chem_list) -mcds.plot_cell_type_counts() \ No newline at end of file +mcds.plot_cell_type_counts()