In [1]:
import pandas as pd
ts_df = pd.read_csv("./data/tracks.txt", sep="\t")
ts_df = ts_df.loc[ts_df["t"] <= 242]
cell_names = ts_df["name"].unique()
valid_cell_names = []
for name in cell_names:
    time_points = ts_df.loc[ts_df['name'] == name]["t"].values
    if len(time_points == 1) and time_points[0] == 242:
        continue
    valid_cell_names.append(name)

In [2]:
# name mapping
def map_names(did):
    """ Re-map cells to use their 'name' given their 'did'. Only applies to a
        few select cells where the tracker uses their 'name' instead of 'did'.
    """
    if   did == "P4a": return "Z3"
    elif did == "P4p": return "Z2"
    elif did == "P0a": return "AB"
    else: return did

In [3]:
len(valid_cell_names)

944

In [4]:
import json

def load_json(file_path):
    """
    Load a JSON file and return its content as a Python object.
    
    :param file_path: Path to the JSON file.
    :return: Parsed JSON content as a Python object.
    """
    with open(file_path, 'r', encoding='utf-8') as file:
        return json.load(file)
    
lineage_data = load_json('./data/cell_lineage.json')


In [5]:
from collections import defaultdict
terminal_nodes = []
terminal_parents = []
terminal_graph = defaultdict(list)
def dfs(node, parent):
    children = node.get("children", [])
    if len(children) == 0:
        lookup_name = map_names(node["did"])
        p_lookup_name = map_names(parent['did'])
        if lookup_name in valid_cell_names:
            terminal_nodes.append(lookup_name)
            terminal_parents.append(p_lookup_name)
            terminal_graph[p_lookup_name].append(lookup_name)
    else:
        for child in children:
            dfs(child, node)

dfs(lineage_data, None)

In [6]:
len(terminal_nodes)

248

In [7]:
terminal_node_cords = []
terminal_parent_cords = []
for node, parent in zip(terminal_nodes, terminal_parents):
    terminal_node_cords.append(ts_df.loc[ts_df['name'] == node].values[-1][1:4]*0.1625)
    terminal_parent_cords.append(ts_df.loc[ts_df['name'] == parent].values[-1][1:4]*0.1625)

In [8]:
import numpy as np
from scipy.optimize import linear_sum_assignment

In [9]:
cost_mat = np.zeros((len(terminal_parent_cords), len(terminal_parent_cords)))

In [10]:
for i in range(len(terminal_parent_cords)):
    for j in range(len(terminal_parent_cords)):
        cost_mat[i][j] = np.linalg.norm(terminal_parent_cords[i] - terminal_node_cords[j])

In [11]:
row_ind, col_ind = linear_sum_assignment(cost_mat)

In [26]:
count = 0
for row, col in zip(row_ind, col_ind):
    parent = terminal_parents[row]
    new_child = terminal_nodes[col]
    if new_child in terminal_graph[parent]:
        count += 1

In [30]:
count / 248

0.625

optimal: 612.08
lineage: 696.64
percentage: 62%
do some random assignment test

In [12]:
cost_mat[row_ind, col_ind].sum() 

np.float64(612.0831926576882)

In [13]:
cost_mat.diagonal().sum()

np.float64(696.6409294449925)

In [20]:
monte_carlo_list = []
for i in range(10000000):
    monte_carlo_list.append(cost_mat[row_ind, np.random.permutation(248)].sum())
 

In [21]:
lineage_cost = cost_mat.diagonal().sum()
count = 0
for i in monte_carlo_list:
    if i < lineage_cost: count += 1

In [23]:
min(monte_carlo_list)

np.float64(3700.6407127242264)

In [24]:
max(monte_carlo_list)

np.float64(4690.391465256001)