In [None]:
import os
import pickle

import numpy as np
import pandas as pd
from tqdm import tqdm
import json
from sklearn import metrics

import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

import cv2

In [None]:
image_size = (512, 512)

In [None]:
mhd_info_csv_path = "data/mhd_info.csv"
mhd_info_df = pd.read_csv(mhd_info_csv_path)
mhd_info_df.head()

In [None]:
annotations_included_path = "data/annotations.csv"
annotations_included_df = pd.read_csv(annotations_included_path, delimiter=",")
annotations_included_df["included"] = 1
annotations_included_df.head()

In [None]:
annotations_excluded_path = "data/annotations_excluded.csv"
annotations_excluded_df = pd.read_csv(annotations_excluded_path, delimiter=",")
annotations_excluded_df["included"] = 0
annotations_excluded_df.head()

In [None]:
annotations = pd.concat([annotations_included_df, annotations_excluded_df])
annotations.head()

In [None]:
def read_detection_result():
    # detection_result_folder = "../output/valresults/caltech/h/off"
    detection_result_folder = "../output/valresults/caltech/hw/off"

    detection_dfs = {}
    for epoch_id in tqdm(os.listdir(detection_result_folder), desc="test epoch"):
        current_folder_path = "{}/{}".format(detection_result_folder, epoch_id)

        detection_dfs.setdefault(epoch_id, {})
        for filename in os.listdir(current_folder_path):
            series_id = filename[0:-4]
            file = os.path.join(current_folder_path, filename)
            try:
                result_df = pd.read_csv(file, delimiter=" ", header=None)
                result_df.columns = ["z-index", "top_left_x", "top_left_y", "width", "height", "probability"]
                detection_dfs.get(epoch_id).setdefault(series_id, result_df)
            except Exception:
                # print(epoch_id, filename, "skipped")
                empty_df = pd.DataFrame()
                detection_dfs.get(epoch_id).setdefault(series_id, empty_df)

    return detection_dfs

test_detections = read_detection_result()

In [None]:
test_detections.keys()

In [None]:
def is_nodule_included(n):
    return n["included"] == 1

In [None]:

def evaluate_froc():
    for epoch_id in tqdm(list(test_detections.keys())[82:90]):
    #for epoch_id in ["028"]:
        epoch_candidates = test_detections[epoch_id]

        total_number_of_candidates = 0
        total_number_of_nodules = 0
        irrelevant_candidates = 0

        double_candidates_ignored = 0

        tp = 0
        fp = 0
        fn = 0
        tn = 0

        min_probability = -1

        froc_gts = []
        froc_probs = []

        fp_divisors = []
        exclude_list = []

        ignored_candidates = []

        series_ids = epoch_candidates.keys()

        for series_id in tqdm(series_ids):
            mhd_info = mhd_info_df[mhd_info_df["series_id"] == series_id].iloc[0]
            origins = mhd_info["origins"]
            spacings = mhd_info["spacings"]
            is_flip = mhd_info["is_flip"]

            spacings = json.loads(spacings)
            origins = json.loads(origins)

            [z_spacing, y_spacing, x_spacing] = spacings
            [z_origin, y_origin, x_origin] = origins

            # candidates = epoch_candidates[series_id].reset_index().query("probability >= {}".format(probability_threshold))
            candidates = epoch_candidates[series_id].reset_index()
            # top_z_index = set(candidates.sort_values(by=['probability'], ascending=False)[0:20]["z-index"].values)
            top_z_index = set(candidates[candidates["probability"] > probability_threshold]["z-index"].values)
            candidates = candidates[candidates["z-index"].isin(top_z_index)].sort_values(by=['probability'], ascending=False)
            candidates2 = candidates.copy()
            total_number_of_candidates = len(candidates)

            nodules = annotations[annotations["seriesuid"] == series_id]

            for _, nodule in nodules.iterrows():
                if is_nodule_included(nodule):
                    total_number_of_nodules += 1

                x = nodule["coordX"]
                y = nodule["coordY"]
                z = nodule["coordZ"]
                diameter = nodule["diameter_mm"]

                if diameter <= 0:
                    diameter = 10

                radius_squared = pow((diameter / 2), 2)

                matched_candidates = []

                for candidate_id, candidate in candidates.iterrows():
                    x2 = candidate["top_left_x"]
                    y2 = candidate["top_left_y"]
                    z2 = candidate["z-index"]

                    if is_flip:
                        x2 = image_size[0] - x2
                        y2 = image_size[1] - y2

                    x2 = x2 * x_spacing + x_origin
                    y2 = y2 * y_spacing + y_origin
                    z2 = z2 * z_spacing + z_origin

                    dist = pow(x - x2, 2) + pow(y - y2, 2) + pow(z - z2, 2)
                    if dist < radius_squared:
                        if is_nodule_included(nodule):
                            matched_candidates.append(candidate)
                            # if candidate_id not in candidates2.index.values:
                            #     print(candidate, nodule)
                            # else:
                            #     candidates2.drop([candidate_id])
                        # else:
                        #     if candidate_id in candidates2.index.values:
                        #         irrelevant_candidates += 1
                        #         ignored_candidates.append(nodule)
                        #         candidates2.drop([candidate_id])
                        break

                # if len(matched_candidates) > 1:
                #     double_candidates_ignored += (len(matched_candidates) - 1)

                if is_nodule_included(nodule):
                    if len(matched_candidates) > 0:
                        # max_probability = min_probability
                        # for candidate in matched_candidates:
                        #     max_probability = max(max_probability, candidate["probability"])
                        #
                        # froc_gts.append(1)
                        # froc_probs.append(max_probability)
                        # fp_divisors.append(series_id)
                        # exclude_list.append(False)
                        tp += 1
                    else:
                        fn += 1
                        # froc_gts.append(1)
                        # froc_probs.append(min_probability)
                        # fp_divisors.append(series_id)
                        # exclude_list.append(True)

            fp += len(candidates2)

            # for _, candidate3 in candidates2.iterrows():
            #     fp += 1
            #     froc_gts.append(0)
            #     froc_probs.append(candidate3["probability"])
            #     fp_divisors.append(series_id)
            #     exclude_list.append(False)

        print(epoch_id)
        print("tp", tp)
        print("fp", fp)
        print("fn", fn)
        # print("tn", tn)
        #
        # print("total number of candidates", total_number_of_candidates)
        # print("total number of nodules", total_number_of_nodules)
        #
        # print("irrelevant candidates", irrelevant_candidates)
        # print("Ignored candidates which were double detections on a nodule", double_candidates_ignored)

        sensitivity_x.append(int(epoch_id))
        if total_number_of_nodules == 0:
            print("sensitivity", 0)
            sensitivity_y.append(0)
        else:
            print("sensitivity", tp / total_number_of_nodules)
            sensitivity_y.append(tp / total_number_of_nodules)

        print("Average number of candidates per scan", total_number_of_candidates / len(series_ids))
        fps.append(fp)

        # y = []
        # y_prob = []
        #
        # for i in range(len(exclude_list)):
        #     y.append(froc_gts[i])
        #     y_prob.append(froc_probs[i])
        #
        # fpr, tpr, thresholds = metrics.roc_curve(y, y_prob)
        #
        # #create ROC curve
        # plt.plot(fpr, tpr)
        # plt.ylabel('True Positive Rate')
        # plt.xlabel('False Positive Rate')
        # plt.show()


In [None]:
sensitivity_x = []
sensitivity_y = []

fps = []

probability_threshold = 0.1

evaluate_froc()

In [None]:
figure(figsize=(25, 5))
plt.plot(sensitivity_x, sensitivity_y, label='Sensitivity')

In [None]:
figure(figsize=(25, 5))
plt.plot(sensitivity_x, fps, label='fp')

In [None]:
import math

sorted(list(zip(sensitivity_x, sensitivity_y, fps)), key=lambda t: t[1]*10000-math.log(t[2]))

In [None]:
import multiprocessing

print("Number of cpu : ", multiprocessing.cpu_count())