# Compute distance between markup points

Import modules and define functions

In [19]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import json
from io import StringIO

# numpy disable scientific notation for easier debugging
np.set_printoptions(suppress=True, precision=4)

# make prototype for storing CT, T1 and electrodes filenames
class SubjectFiles:
    def __init__(self, subject_root, ct_file, t1_file, electrodes_file):
        self.subject_root = subject_root
        self.ct_file = ct_file
        self.t1_file = t1_file
        self.electrodes_file = electrodes_file

    def ct_date(self):
        return self.get_date(self.ct_file)
    
    def t1_date(self):
        return self.get_date(self.t1_file)

    @staticmethod
    def get_date(filename):
        match = re.search(r"_(\d{8})_", filename)
        if match:
            return match.group(1)
        return None
    
def load_json(path: str) -> pd.DataFrame:
    markups_prediction_json = json.load(open(path, "r"))
    assert markups_prediction_json["markups"][0]["coordinateUnits"] == "mm"
    assert markups_prediction_json["markups"][0]["coordinateSystem"] == "LPS"
    df = pd.DataFrame(markups_prediction_json["markups"][0]["controlPoints"], columns=["label", "position"])
    df["position"] = df["position"].apply(lambda x: [-x[0], -x[1], x[2]]) # LPS -> RAS
    return df

def load_fcsv(path: str) -> pd.DataFrame:
    with open(path, "r") as f:
        electrodes_str = f.read().replace(",,", ",")
    for line in electrodes_str.split("\n"):
        # get header
        if line.startswith("# columns = "):
            header_line = line.strip().replace("# columns = ", "")
            columns = header_line.split(",")
            break

    # read lines
    electrodes_df = pd.read_csv(StringIO(electrodes_str), sep=",", skiprows=3, names=columns)
    electrodes_df["position"] = electrodes_df[["x", "y", "z"]].apply(lambda x: list(x), axis=1)
    return electrodes_df[["label", "position"]]


Load data

In [20]:
input_dir = "../../Data"
subjects: list[SubjectFiles] = []

# walk through all subfolders and search for *CT*, *T1* and electrodes.fcsv files
for root, dirs, files in os.walk(input_dir):
    ct_path = None
    t1_path = None
    fcsv_path = None

    for file in files:
        if "CT" in file and file.endswith((".nii", ".nii.gz")):
            ct_path = file
        elif "T1" in file and not file.startswith("rand_affine_") and file.endswith((".nii", ".nii.gz")):
            t1_path = file
        elif file == "ContactDetector.mrk.json":
            fcsv_path = file
    if ct_path and t1_path and fcsv_path:
        root = root.replace(input_dir + os.sep, "")
        subjects.append(SubjectFiles(root, ct_path, t1_path, fcsv_path))

Compute distance between markup points:

In [21]:
markups_all = []
for subject in subjects:
    gt_df = load_fcsv(os.path.join(input_dir, subject.subject_root, "electrodes.fcsv"))
    contact_detector_df = load_json(os.path.join(input_dir, subject.subject_root, "ContactDetector.mrk.json"))

    # merge gt and contact detector df on label
    markups = pd.merge(gt_df, contact_detector_df, on="label", how="outer", suffixes=("_gt", "_contact_detector"))
    markups["prefix"] = markups["label"].str.extract(r"(.*?)(\d+)$")[0]
    markups["contact"] = markups["label"].str.extract(r"(.*?)(\d+)$")[1]
    markups["subject_root"] = subject.subject_root
    # change column order
    markups = markups[["subject_root", "label", "prefix", "contact", "position_gt", "position_contact_detector"]]

    for index, row in markups.iterrows():
        markups.loc[index, "norm(gt,contact detector)"] = np.linalg.norm(np.array(row["position_gt"]) - np.array(row["position_contact_detector"]))

    markups_all.append(markups)

markups = pd.concat(markups_all)

# save to csv
markups.to_csv("results.csv", index=False)

markups

Unnamed: 0,subject_root,label,prefix,contact,position_gt,position_contact_detector,"norm(gt,contact detector)"
0,1673284,A1,A,1,"[0.05, 29.23, 53.48]","[0.20397943333425417, 29.20470322404134, 53.49...",0.156366
1,1673284,A10,A,10,"[25.37, 40.43, 68.49]","[25.47087947652311, 40.45460149394766, 68.5250...",0.109580
2,1673284,A11,A,11,"[28.13, 41.62, 70.28]","[28.272760663193438, 41.67479450128934, 70.356...",0.171034
3,1673284,A12,A,12,"[30.89, 42.77, 72.12]","[30.96981314782937, 42.81192349296771, 72.1716...",0.103886
4,1673284,A2,A,2,"[2.93, 30.4, 55.11]","[3.0039121505803195, 30.447632106782805, 55.15...",0.098063
...,...,...,...,...,...,...,...
192,890775,Xs5,Xs,5,"[-31.73, 15.22, 24.4]","[-31.70789205742649, 15.307505739111576, 24.39...",0.090460
193,890775,Xs6,Xs,6,"[-31.76, 14.41, 27.93]","[-31.730885604066586, 14.509194923468442, 27.8...",0.117314
194,890775,Xs7,Xs,7,"[-31.78, 13.6, 31.46]","[-31.755527454312258, 13.734198455449913, 31.2...",0.248028
195,890775,Xs8,Xs,8,"[-31.8, 12.79, 34.98]","[-31.781480020386326, 12.956694362078807, 34.6...",0.387815


Table of missing contact detections:

In [22]:
markups[markups["position_contact_detector"].isna() | markups["position_gt"].isna()]

Unnamed: 0,subject_root,label,prefix,contact,position_gt,position_contact_detector,"norm(gt,contact detector)"


Group by subject_root and prefix, get max norm and mean norm for each group:

In [23]:
electrodes = markups.groupby(["subject_root", "prefix"]).agg({
    "subject_root": "first",
    "norm(gt,contact detector)": ["max", "mean"]
}).sort_values(("norm(gt,contact detector)", "max"), ascending=False)
electrodes

Unnamed: 0_level_0,Unnamed: 1_level_0,subject_root,"norm(gt,contact detector)","norm(gt,contact detector)"
Unnamed: 0_level_1,Unnamed: 1_level_1,first,max,mean
subject_root,prefix,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1670379,O,1670379,1.781619,0.508867
1812938,Pd,1812938,1.575940,0.388619
1883612/i1,Q,1883612/i1,1.409729,0.359786
2001979,E,2001979,1.187715,0.337377
1923351,P,1923351,1.169610,0.668312
...,...,...,...,...
2132086,K,2132086,0.054074,0.029403
52187,K,52187,0.053954,0.034217
2102982,CP,2102982,0.050617,0.032970
2307417,E,2307417,0.050599,0.023886


Consider mean norm > 1 as failed detection for particular electrode:

In [24]:
# for subject_root count norm.mean > 1
failed = electrodes[electrodes[("norm(gt,contact detector)", "mean")] > 1]

# save to csv
failed.to_csv("failed.csv", index=False)
failed

Unnamed: 0_level_0,Unnamed: 1_level_0,subject_root,"norm(gt,contact detector)","norm(gt,contact detector)"
Unnamed: 0_level_1,Unnamed: 1_level_1,first,max,mean
subject_root,prefix,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
