In [None]:

import json
import os
from math import sqrt
from tqdm import tqdm
import pandas as pd


In [None]:
image_size = (512, 512)

In [None]:
mhd_info_csv_path = "data/mhd_info.csv"
mhd_info_df = pd.read_csv(mhd_info_csv_path)
mhd_info_df.head()

In [None]:
def read_detection_result(test_subset_id, threshold=0.1):
    current_folder_path = "../output-ss{}/valresults/fp-reduce".format(test_subset_id)

    detection_dfs = {}

    for filename in os.listdir(current_folder_path):
        series_id = filename[0:-4]
        file = os.path.join(current_folder_path, filename)
        try:
            result_df = pd.read_csv(file, delimiter=" ", header=None)
            result_df.columns = ["z-index", "top_left_x", "top_left_y", "width", "height", "probability"]
            result_df = result_df[result_df["probability"] > threshold].sort_values(by="probability", ascending=False)
            print(result_df.drop_duplicates(subset="z-index"))
            detection_dfs.setdefault(series_id, result_df)
        except Exception:
            # print(epoch_id, filename, "skipped")
            empty_df = pd.DataFrame()
            detection_dfs.setdefault(series_id, empty_df)
        break

    return detection_dfs

a = read_detection_result(0, 0.2)

In [None]:
def restore_coord(x, y, z, w, h, spacings, origins, is_flip):
    [z_spacing, y_spacing, x_spacing] = spacings
    [z_origin, y_origin, x_origin] = origins

    x = x + w / 2
    y = y + h / 2

    if is_flip:
        x = image_size[0] - x
        y = image_size[1] - y

    x = x * x_spacing + x_origin
    y = y * y_spacing + y_origin
    z = z * z_spacing + z_origin
    w = w * x_spacing
    h = h * y_spacing

    return x, y, z, w, h

In [None]:
def combine_result(test_subset_id, detections):
    for series_id in tqdm(detections.keys(), desc="subset {} series".format(test_subset_id)):
        mhd_info = mhd_info_df[mhd_info_df["series_id"] == series_id].iloc[0]
        origins = mhd_info["origins"]
        spacings = mhd_info["spacings"]
        is_flip = mhd_info["is_flip"]

        spacings = json.loads(spacings)
        origins = json.loads(origins)

        t = test_detections[series_id]
        t = t.to_records()

        used = {}
        group = {}
        for i in range(0, len(t)):
            if used.get(i, 0) == 1:
                continue
            x1 = t[i][2]
            y1 = t[i][3]
            z1 = t[i][1]
            w1 = t[i][4]
            h1 = t[i][5]

            x1, y1, z1, w1, h1 = restore_coord(x1, y1, z1, w1, h1, spacings, origins, is_flip)

            group.setdefault(i, [])
            used[i] = 1

            for j in range(i + 1, len(t)):
                if used.get(j, 0) == 1:
                    continue

                x2 = t[j][2]
                y2 = t[j][3]
                z2 = t[j][1]
                w2 = t[j][4]
                h2 = t[j][5]

                x2, y2, z2, w2, h2 = restore_coord(x2, y2, z2, w2, h2, spacings, origins, is_flip)

                dist = sqrt(pow(x1 - x2, 2) + pow(y1 - y2, 2) + pow(z1 - z2, 2))

                if dist < 0.5 * (0.5 * (w1 + w2) + 0.5 * (h1 + h2)):
                    used[j] = 1
                    group[i].append(j)

        for k in group.keys():
            x = 0
            y = 0
            z = 0
            w = 0
            h = 0
            p = 0

            x += t[k][2]
            y += t[k][3]
            z += t[k][1]
            w += t[k][4]
            h += t[k][5]
            p = max(p, t[k][6])

            if len(group[k]) != 0:
                for l in group[k]:
                    x += t[l][2]
                    y += t[l][3]
                    z += t[l][1]
                    w += t[l][4]
                    h += t[l][5]
                    p = max(p, t[l][6])

                x /= len(group[k]) + 1
                y /= len(group[k]) + 1
                z /= len(group[k]) + 1
                w /= len(group[k]) + 1
                h /= len(group[k]) + 1

            x, y, z, w, h = restore_coord(x, y, z, w, h, spacings, origins, is_flip)

            result.append((series_id, x, y, z, p))

In [None]:
result = []
threshold = 0.02

for subset in tqdm(range(0, 10), desc="subset"):
    test_detections = read_detection_result(subset, threshold)
    combine_result(subset, test_detections)

In [None]:
result_df = pd.DataFrame(result)
result_df.columns = ["seriesuid", "coordX", "coordY", "coordZ", "probability"]
result_df.set_index("seriesuid", inplace=True)
result_df.head()

In [None]:
len(result_df)

In [None]:
result_df.to_csv("submission.csv")