# Compute distance between markup points

Import modules and define functions

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import json
from io import StringIO

# numpy disable scientific notation for easier debugging
np.set_printoptions(suppress=True, precision=4)

# make prototype for storing CT, T1 and electrodes filenames
class SubjectFiles:
    def __init__(self, subject_root, ct_file, t1_file, electrodes_file):
        self.subject_root = subject_root
        self.ct_file = ct_file
        self.t1_file = t1_file
        self.electrodes_file = electrodes_file

    def ct_date(self):
        return self.get_date(self.ct_file)
    
    def t1_date(self):
        return self.get_date(self.t1_file)

    @staticmethod
    def get_date(filename):
        match = re.search(r"_(\d{8})_", filename)
        if match:
            return match.group(1)
        return None
    
def load_json(path: str) -> pd.DataFrame:
    markups_prediction_json = json.load(open(path, "r"))
    assert markups_prediction_json["markups"][0]["coordinateUnits"] == "mm"
    assert markups_prediction_json["markups"][0]["coordinateSystem"] == "LPS"
    df = pd.DataFrame(markups_prediction_json["markups"][0]["controlPoints"], columns=["label", "position"])
    df["position"] = df["position"].apply(lambda x: [-x[0], -x[1], x[2]]) # LPS -> RAS
    return df

def load_fcsv(path: str) -> pd.DataFrame:
    with open(path, "r") as f:
        electrodes_str = f.read().replace(",,", ",")
    for line in electrodes_str.split("\n"):
        # get header
        if line.startswith("# columns = "):
            header_line = line.strip().replace("# columns = ", "")
            columns = header_line.split(",")
            break

    # read lines
    electrodes_df = pd.read_csv(StringIO(electrodes_str), sep=",", skiprows=3, names=columns)
    electrodes_df["position"] = electrodes_df[["x", "y", "z"]].apply(lambda x: list(x), axis=1)
    return electrodes_df[["label", "position"]]


Load data

In [2]:
input_dir = "../../Data"
subjects: list[SubjectFiles] = []

# walk through all subfolders and search for *CT*, *T1* and electrodes.fcsv files
for root, dirs, files in os.walk(input_dir):
    ct_path = None
    t1_path = None
    fcsv_path = None

    for file in files:
        if "CT" in file and file.endswith((".nii", ".nii.gz")):
            ct_path = file
        elif "T1" in file and not file.startswith("rand_affine_") and file.endswith((".nii", ".nii.gz")):
            t1_path = file
        elif file == "ContactDetector.mrk.json":
            fcsv_path = file
    if ct_path and t1_path and fcsv_path:
        root = root.replace(input_dir + os.sep, "")
        subjects.append(SubjectFiles(root, ct_path, t1_path, fcsv_path))

Compute distance between markup points:

In [3]:
markups_all = []
for subject in subjects:
    gt_df = load_fcsv(os.path.join(input_dir, subject.subject_root, "electrodes.fcsv"))
    contact_detector_df = load_json(os.path.join(input_dir, subject.subject_root, "ContactDetector.mrk.json"))

    # merge gt and contact detector df on label
    markups = pd.merge(gt_df, contact_detector_df, on="label", how="outer", suffixes=("_gt", "_contact_detector"))
    markups["prefix"] = markups["label"].str.extract(r"(.*?)(\d+)$")[0]
    markups["contact"] = markups["label"].str.extract(r"(.*?)(\d+)$")[1]
    markups["subject_root"] = subject.subject_root
    # change column order
    markups = markups[["subject_root", "label", "prefix", "contact", "position_gt", "position_contact_detector"]]

    for index, row in markups.iterrows():
        markups.loc[index, "norm(gt,contact detector)"] = np.linalg.norm(np.array(row["position_gt"]) - np.array(row["position_contact_detector"]))

    markups_all.append(markups)

markups = pd.concat(markups_all)

# save to csv
markups.to_csv("results.csv", index=False)

markups

Unnamed: 0,subject_root,label,prefix,contact,position_gt,position_contact_detector,"norm(gt,contact detector)"
0,1673284,A1,A,1,"[0.05, 29.23, 53.48]","[0.20259764656171342, 29.202499229310675, 53.4...",0.155341
1,1673284,A10,A,10,"[25.37, 40.43, 68.49]","[25.4611807514229, 40.452824995441745, 68.5184...",0.098201
2,1673284,A11,A,11,"[28.13, 41.62, 70.28]","[28.262181608377162, 41.673584951614025, 70.34...",0.158529
3,1673284,A12,A,12,"[30.89, 42.77, 72.12]","[30.95846748739138, 42.81083054561752, 72.1635...",0.090823
4,1673284,A2,A,2,"[2.93, 30.4, 55.11]","[3.0011904019190183, 30.448221071122504, 55.15...",0.095504
...,...,...,...,...,...,...,...
192,890775,Xs5,Xs,5,"[-31.73, 15.22, 24.4]","[-31.707361599482127, 15.327032000437313, 24.3...",0.142524
193,890775,Xs6,Xs,6,"[-31.76, 14.41, 27.93]","[-31.729642956805023, 14.543537168158394, 27.7...",0.249286
194,890775,Xs7,Xs,7,"[-31.78, 13.6, 31.46]","[-31.754394327262546, 13.760270830540435, 31.1...",0.363328
195,890775,Xs8,Xs,8,"[-31.8, 12.79, 34.98]","[-31.7805969587571, 12.974862439257933, 34.547...",0.470768


Table of missing contact detections:

In [4]:
markups[markups["position_contact_detector"].isna() | markups["position_gt"].isna()]

Unnamed: 0,subject_root,label,prefix,contact,position_gt,position_contact_detector,"norm(gt,contact detector)"
193,1637126,M10,M,10,"[45.54, -29.39, 8.82]",,
194,1637126,M11,M,11,"[48.82, -30.62, 8.92]",,
195,1637126,M12,M,12,"[52.1, -31.85, 9.02]",,
196,1637126,M13,M,13,"[55.38, -33.07, 9.1]",,
197,1637126,M14,M,14,"[58.66, -34.29, 9.17]",,
198,1637126,M15,M,15,"[61.94, -35.5, 9.21]",,
199,1637126,M16,M,16,"[65.22, -36.7, 9.24]",,
200,1637126,M17,M,17,"[68.51, -37.88, 9.24]",,
201,1637126,M18,M,18,"[71.79, -39.06, 9.2]",,
205,1637126,M5,M,5,"[29.14, -23.3, 8.29]",,


Group by subject_root and prefix, get max norm and mean norm for each group:

In [5]:
electrodes = markups.groupby(["subject_root", "prefix"]).agg({
    "subject_root": "first",
    "norm(gt,contact detector)": ["max", "mean"]
}).sort_values(("norm(gt,contact detector)", "max"), ascending=False)
electrodes

Unnamed: 0_level_0,Unnamed: 1_level_0,subject_root,"norm(gt,contact detector)","norm(gt,contact detector)"
Unnamed: 0_level_1,Unnamed: 1_level_1,first,max,mean
subject_root,prefix,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1637126,M,1637126,60.414070,60.348085
1923351,C,1923351,52.167086,44.330486
1923351,G,1923351,21.506365,21.126667
2394232,I,2394232,3.597391,3.534609
1396542,Xd,1396542,3.521016,3.450123
...,...,...,...,...
1239007,Wd,1239007,0.059851,0.046283
2394232,C,2394232,0.058636,0.041319
2185798/i2,U,2185798/i2,0.058002,0.035635
2179801,V,2179801,0.057047,0.038045


Consider mean norm > 1 as failed detection for particular electrode:

In [6]:
# for subject_root count norm.mean > 1
failed = electrodes[electrodes[("norm(gt,contact detector)", "mean")] > 1]

# save to csv
failed.to_csv("failed.csv", index=False)
failed

Unnamed: 0_level_0,Unnamed: 1_level_0,subject_root,"norm(gt,contact detector)","norm(gt,contact detector)"
Unnamed: 0_level_1,Unnamed: 1_level_1,first,max,mean
subject_root,prefix,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1637126,M,1637126,60.41407,60.348085
1923351,C,1923351,52.167086,44.330486
1923351,G,1923351,21.506365,21.126667
2394232,I,2394232,3.597391,3.534609
1396542,Xd,1396542,3.521016,3.450123
1637126,C,1637126,3.498052,3.445239
2394232,G,2394232,3.433468,3.380412
