In [1]:
from pathlib import Path
from FISHClass.datasets import MYCN
from torch.utils.data import DataLoader
import yaml
from baseline import models
from FISHClass.utils.evaluation import get_top_model
from FISHClass.evaluation.evaluate_test_set import predict_test, predict_test_baseline
from FISHClass.models import CombinedModel
from FISHClass.utils.device import best_gpu
import os
import torch
import FISHClass

In [2]:
DATASET = "/data_isilon_main/isilon_images/10_MetaSystems/MetaSystemsData/MYCN_SpikeIn/TRAINING_DATA_5K.h5"
OUT = "/data_isilon_main/isilon_images/10_MetaSystems/MetaSystemsData/MYCN_SpikeIn/PAPER_RESULTS/TEST_SET_5K"

with open("/home/simon_g/src/FISHClass/evaluation/model_evaluation.yaml") as f:

    yaml_data = yaml.load(f, Loader=yaml.FullLoader)["trained_model_paths"]

for model_name, model_items in yaml_data.items():
    
    if not isinstance(model_items, str):
        
        if model_items["model_type"] == "AreaModel":
            model = getattr(models, model_items["model_type"])(**model_items["AreaModel_kwargs"])
            results = predict_test_baseline(model, DATASET, dataset_kwargs={"norm_type": None, "transform": None}, save2h5=True, save_path=os.path.join(OUT, f"{model_name}_results.h5"))
    
    
        elif model_items["model_type"] == "SpotdetectionModel":

            model = getattr(models, model_items["model_type"])(**model_items["SpotdetectionModel_kwargs"])
            results = predict_test_baseline(model, DATASET, dataset_kwargs={"norm_type": None, "transform": None}, save2h5=True, save_path=os.path.join(OUT, f"{model_name}_results.h5"))
        
    else:
        try:
            model = torch.load(get_top_model(model_items))["model"]
        except:
            model = torch.load(get_top_model(model_items))
            
        print(type(model))
        results= predict_test(model, DATASET, device=best_gpu(), batch_size=16, 
                              dataset_kwargs={"double_return": isinstance(model, (FISHClass.ModelZoo.FeaturespaceClassifier.FeaturespaceClassifier, FISHClass.ModelZoo.WeightedFeaturespaceClassifier.WeightedFeaturespaceClassifier)), 
                                              "norm_type": model.norm_type, 
                                              "mask": model.mask, 
                                              "channels": model.channels, 
                                              "transform": None}, 
                              save2h5=True, 
                              save_path=os.path.join(OUT, f"{model_name}_results.h5"))

  0%|          | 0/32 [00:00<?, ?it/s]  3%|▎         | 1/32 [00:01<00:35,  1.15s/it]  6%|▋         | 2/32 [00:01<00:22,  1.32it/s]  9%|▉         | 3/32 [00:01<00:13,  2.17it/s] 19%|█▉        | 6/32 [00:01<00:05,  4.73it/s] 28%|██▊       | 9/32 [00:02<00:03,  7.56it/s] 34%|███▍      | 11/32 [00:03<00:06,  3.31it/s] 44%|████▍     | 14/32 [00:04<00:06,  2.94it/s] 50%|█████     | 16/32 [00:04<00:04,  3.82it/s] 59%|█████▉    | 19/32 [00:04<00:02,  5.49it/s] 69%|██████▉   | 22/32 [00:05<00:01,  5.53it/s] 78%|███████▊  | 25/32 [00:05<00:00,  7.47it/s] 88%|████████▊ | 28/32 [00:05<00:00,  9.68it/s] 97%|█████████▋| 31/32 [00:05<00:00, 12.08it/s]100%|██████████| 32/32 [00:05<00:00,  5.48it/s]
  0%|          | 0/32 [00:00<?, ?it/s]  3%|▎         | 1/32 [00:01<00:34,  1.10s/it]  6%|▋         | 2/32 [00:01<00:23,  1.30it/s]  9%|▉         | 3/32 [00:01<00:14,  1.97it/s] 12%|█▎        | 4/32 [00:01<00:09,  2.86it/s] 16%|█▌        | 5/32 [00:02<00:07,  3.49it/s] 19%|█▉        | 6/

<class 'FISHClass.ModelZoo.CombinedModel.CombinedModel'>
Using cuda:1 for calculation


  0%|          | 0/63 [00:00<?, ?it/s]  2%|▏         | 1/63 [00:01<01:53,  1.84s/it]  3%|▎         | 2/63 [00:02<01:03,  1.04s/it]  5%|▍         | 3/63 [00:02<00:46,  1.28it/s]  6%|▋         | 4/63 [00:03<00:38,  1.52it/s]  8%|▊         | 5/63 [00:03<00:34,  1.68it/s] 10%|▉         | 6/63 [00:04<00:31,  1.80it/s] 11%|█         | 7/63 [00:04<00:29,  1.89it/s] 13%|█▎        | 8/63 [00:05<00:28,  1.96it/s] 14%|█▍        | 9/63 [00:05<00:26,  2.00it/s] 16%|█▌        | 10/63 [00:06<00:26,  2.03it/s] 17%|█▋        | 11/63 [00:06<00:25,  2.05it/s] 19%|█▉        | 12/63 [00:07<00:24,  2.07it/s] 21%|██        | 13/63 [00:07<00:24,  2.08it/s] 22%|██▏       | 14/63 [00:08<00:23,  2.09it/s] 24%|██▍       | 15/63 [00:08<00:22,  2.09it/s] 25%|██▌       | 16/63 [00:08<00:22,  2.10it/s] 27%|██▋       | 17/63 [00:09<00:21,  2.10it/s] 29%|██▊       | 18/63 [00:09<00:21,  2.10it/s] 30%|███       | 19/63 [00:10<00:20,  2.10it/s] 32%|███▏      | 20/63 [00:10<00:20,  2.11it/s] 33%|███▎ 

<class 'FISHClass.ModelZoo.CombinedModel.CombinedModel'>
Using cuda:2 for calculation


  0%|          | 0/63 [00:00<?, ?it/s]  2%|▏         | 1/63 [00:01<01:40,  1.63s/it]  3%|▎         | 2/63 [00:02<00:59,  1.03it/s]  5%|▍         | 3/63 [00:02<00:44,  1.34it/s]  6%|▋         | 4/63 [00:03<00:37,  1.57it/s]  8%|▊         | 5/63 [00:03<00:33,  1.73it/s] 10%|▉         | 6/63 [00:04<00:30,  1.84it/s] 11%|█         | 7/63 [00:04<00:29,  1.92it/s] 13%|█▎        | 8/63 [00:04<00:27,  1.98it/s] 14%|█▍        | 9/63 [00:05<00:26,  2.01it/s] 16%|█▌        | 10/63 [00:05<00:26,  2.04it/s] 17%|█▋        | 11/63 [00:06<00:25,  2.06it/s] 19%|█▉        | 12/63 [00:06<00:24,  2.08it/s] 21%|██        | 13/63 [00:07<00:23,  2.09it/s] 22%|██▏       | 14/63 [00:07<00:23,  2.10it/s] 24%|██▍       | 15/63 [00:08<00:22,  2.10it/s] 25%|██▌       | 16/63 [00:08<00:22,  2.11it/s] 27%|██▋       | 17/63 [00:09<00:21,  2.11it/s] 29%|██▊       | 18/63 [00:09<00:21,  2.11it/s] 30%|███       | 19/63 [00:10<00:20,  2.11it/s] 32%|███▏      | 20/63 [00:10<00:20,  2.11it/s] 33%|███▎ 

<class 'FISHClass.ModelZoo.ClassificationCNN.ClassificationCNN'>
couldtn redefine
Using cuda:3 for calculation


  0%|          | 0/63 [00:00<?, ?it/s]  2%|▏         | 1/63 [00:01<01:43,  1.67s/it] 14%|█▍        | 9/63 [00:02<00:09,  5.69it/s] 22%|██▏       | 14/63 [00:02<00:05,  9.42it/s] 27%|██▋       | 17/63 [00:02<00:06,  7.37it/s] 37%|███▋      | 23/63 [00:02<00:03, 11.92it/s] 43%|████▎     | 27/63 [00:03<00:04,  8.59it/s] 49%|████▉     | 31/63 [00:03<00:02, 10.70it/s] 54%|█████▍    | 34/63 [00:04<00:02, 10.40it/s] 62%|██████▏   | 39/63 [00:04<00:02, 11.50it/s] 67%|██████▋   | 42/63 [00:04<00:01, 13.46it/s] 75%|███████▍  | 47/63 [00:05<00:01, 11.55it/s] 83%|████████▎ | 52/63 [00:05<00:00, 13.80it/s] 87%|████████▋ | 55/63 [00:05<00:00, 12.40it/s] 98%|█████████▊| 62/63 [00:05<00:00, 18.51it/s]100%|██████████| 63/63 [00:05<00:00, 10.50it/s]


<class 'FISHClass.ModelZoo.ClassificationCNN.ClassificationCNN'>
couldtn redefine
Using cuda:0 for calculation


  0%|          | 0/63 [00:00<?, ?it/s]  2%|▏         | 1/63 [00:01<01:50,  1.79s/it] 14%|█▍        | 9/63 [00:02<00:10,  5.16it/s] 17%|█▋        | 11/63 [00:02<00:09,  5.72it/s] 27%|██▋       | 17/63 [00:02<00:04, 10.25it/s] 32%|███▏      | 20/63 [00:03<00:04,  8.93it/s] 37%|███▋      | 23/63 [00:03<00:03, 10.68it/s] 40%|███▉      | 25/63 [00:03<00:04,  9.36it/s] 48%|████▊     | 30/63 [00:03<00:02, 13.50it/s] 52%|█████▏    | 33/63 [00:04<00:02, 10.83it/s] 60%|██████    | 38/63 [00:04<00:01, 14.75it/s] 65%|██████▌   | 41/63 [00:04<00:01, 12.26it/s] 73%|███████▎  | 46/63 [00:05<00:01,  9.41it/s] 86%|████████▌ | 54/63 [00:05<00:00, 11.98it/s] 92%|█████████▏| 58/63 [00:05<00:00, 14.43it/s]100%|██████████| 63/63 [00:06<00:00, 18.40it/s]100%|██████████| 63/63 [00:06<00:00, 10.20it/s]


<class 'FISHClass.ModelZoo.FeaturespaceClassifier.FeaturespaceClassifier'>
Using cuda:3 for calculation


  0%|          | 0/63 [00:00<?, ?it/s]  2%|▏         | 1/63 [00:02<02:12,  2.14s/it]  3%|▎         | 2/63 [00:02<01:11,  1.17s/it]  5%|▍         | 3/63 [00:03<00:51,  1.16it/s]  6%|▋         | 4/63 [00:03<00:42,  1.40it/s]  8%|▊         | 5/63 [00:04<00:36,  1.58it/s] 10%|▉         | 6/63 [00:04<00:33,  1.71it/s] 11%|█         | 7/63 [00:05<00:30,  1.81it/s] 13%|█▎        | 8/63 [00:05<00:29,  1.88it/s] 14%|█▍        | 9/63 [00:06<00:28,  1.92it/s] 16%|█▌        | 10/63 [00:06<00:27,  1.96it/s] 17%|█▋        | 11/63 [00:07<00:26,  1.98it/s] 19%|█▉        | 12/63 [00:07<00:25,  2.00it/s] 21%|██        | 13/63 [00:08<00:24,  2.02it/s] 22%|██▏       | 14/63 [00:08<00:24,  2.02it/s] 24%|██▍       | 15/63 [00:08<00:23,  2.04it/s] 25%|██▌       | 16/63 [00:09<00:23,  2.04it/s] 27%|██▋       | 17/63 [00:09<00:22,  2.04it/s] 29%|██▊       | 18/63 [00:10<00:22,  2.04it/s] 30%|███       | 19/63 [00:10<00:21,  2.04it/s] 32%|███▏      | 20/63 [00:11<00:21,  2.04it/s] 33%|███▎ 

<class 'FISHClass.ModelZoo.FeaturespaceClassifier.FeaturespaceClassifier'>
Using cuda:0 for calculation


  0%|          | 0/63 [00:00<?, ?it/s]  2%|▏         | 1/63 [00:02<02:09,  2.09s/it]  3%|▎         | 2/63 [00:02<01:09,  1.15s/it]  5%|▍         | 3/63 [00:03<00:50,  1.18it/s]  6%|▋         | 4/63 [00:03<00:41,  1.42it/s]  8%|▊         | 5/63 [00:04<00:36,  1.60it/s] 10%|▉         | 6/63 [00:04<00:32,  1.73it/s] 11%|█         | 7/63 [00:05<00:30,  1.83it/s] 13%|█▎        | 8/63 [00:05<00:28,  1.90it/s] 14%|█▍        | 9/63 [00:05<00:27,  1.95it/s] 16%|█▌        | 10/63 [00:06<00:26,  1.99it/s] 17%|█▋        | 11/63 [00:06<00:25,  2.01it/s] 19%|█▉        | 12/63 [00:07<00:25,  2.03it/s] 21%|██        | 13/63 [00:07<00:24,  2.04it/s] 22%|██▏       | 14/63 [00:08<00:23,  2.05it/s] 24%|██▍       | 15/63 [00:08<00:23,  2.06it/s] 25%|██▌       | 16/63 [00:09<00:22,  2.06it/s] 27%|██▋       | 17/63 [00:09<00:22,  2.07it/s] 29%|██▊       | 18/63 [00:10<00:21,  2.07it/s] 30%|███       | 19/63 [00:10<00:21,  2.07it/s] 32%|███▏      | 20/63 [00:11<00:20,  2.07it/s] 33%|███▎ 

In [3]:
import h5py
import numpy as np

files = list(Path("/data_isilon_main/isilon_images/10_MetaSystems/MetaSystemsData/MYCN_SpikeIn/PAPER_RESULTS/TEST_SET_5K").glob("*.h5"))
print(files)

[PosixPath('/data_isilon_main/isilon_images/10_MetaSystems/MetaSystemsData/MYCN_SpikeIn/PAPER_RESULTS/TEST_SET_5K/FS-RGB_results.h5'), PosixPath('/data_isilon_main/isilon_images/10_MetaSystems/MetaSystemsData/MYCN_SpikeIn/PAPER_RESULTS/TEST_SET_5K/AreaModel_results.h5'), PosixPath('/data_isilon_main/isilon_images/10_MetaSystems/MetaSystemsData/MYCN_SpikeIn/PAPER_RESULTS/TEST_SET_5K/SpotdetectionModel_results.h5'), PosixPath('/data_isilon_main/isilon_images/10_MetaSystems/MetaSystemsData/MYCN_SpikeIn/PAPER_RESULTS/TEST_SET_5K/CNN-GREEN-MASK_results.h5'), PosixPath('/data_isilon_main/isilon_images/10_MetaSystems/MetaSystemsData/MYCN_SpikeIn/PAPER_RESULTS/TEST_SET_5K/CNN-RGB_results.h5'), PosixPath('/data_isilon_main/isilon_images/10_MetaSystems/MetaSystemsData/MYCN_SpikeIn/PAPER_RESULTS/TEST_SET_5K/Basic-Classifier_results.h5'), PosixPath('/data_isilon_main/isilon_images/10_MetaSystems/MetaSystemsData/MYCN_SpikeIn/PAPER_RESULTS/TEST_SET_5K/LSTM-Classifier_results.h5'), PosixPath('/data_i

In [11]:
import pandas as pd

results_dict = {}
for file in files: 
    
    with h5py.File(file) as fout:
        
        target = np.array(fout["TARGET"])
        pred = np.array(fout["PRED"])
    
    TP = len(np.where(np.logical_and(target == 1, pred == 1))[0])
    TN = len(np.where(np.logical_and(target == 0, pred == 0))[0])
    FP = len(np.where(np.logical_and(target == 0, pred == 1))[0])
    FN = len(np.where(np.logical_and(target == 1, pred == 0))[0])
    SPEC = np.round(TN / (TN + FP)*100,2)
    
    # Precision = TP / (TP + FP) 
    # Recall = TP / (TP + FN) 
    # F1 Score = 2 * Precision * Recall / (Precision + Recall)
    
    precision = np.round(TP/(TP+FP)*100,2)
    recall = np.round(TP/(TP+FN)*100,2)
    F1 = np.round(2*precision*recall/(precision+recall),2)
    ACC = ((TP + TN) / (TP+TN+FN+FP))*100
    
    print(f"{file.stem:<30}", " PRECISION: ", f"{precision:<8}", "RECALL/SENSITIVITY: ", f"{recall:<8}", "F1: ", f"{F1:<8}", "SPECIFICITY: ", f"{SPEC:<8}")
    results_dict[f"{file.stem.replace('_results','')}"] = {
        "precision": "{:.2f}".format(precision),
        "recall": recall,
        "F1": F1,
        "specificity": SPEC,
        "sensitivity": recall,
        "accuracy": ACC
    }
    
results_df = pd.DataFrame(results_dict)
results_df.to_excel("/data_isilon_main/isilon_images/10_MetaSystems/MetaSystemsData/MYCN_SpikeIn/PAPER_RESULTS/metric_results.xlsx", sheet_name="metrics")

FS-RGB_results                  PRECISION:  98.84    RECALL/SENSITIVITY:  98.46    F1:  98.65    SPECIFICITY:  98.75   
AreaModel_results               PRECISION:  91.78    RECALL/SENSITIVITY:  92.13    F1:  91.95    SPECIFICITY:  91.02   
SpotdetectionModel_results      PRECISION:  84.93    RECALL/SENSITIVITY:  47.6     F1:  61.01    SPECIFICITY:  90.81   
CNN-GREEN-MASK_results          PRECISION:  98.85    RECALL/SENSITIVITY:  98.66    F1:  98.75    SPECIFICITY:  98.75   
CNN-RGB_results                 PRECISION:  99.61    RECALL/SENSITIVITY:  99.23    F1:  99.42    SPECIFICITY:  99.58   
Basic-Classifier_results        PRECISION:  86.2     RECALL/SENSITIVITY:  87.52    F1:  86.85    SPECIFICITY:  84.76   
LSTM-Classifier_results         PRECISION:  97.67    RECALL/SENSITIVITY:  96.35    F1:  97.01    SPECIFICITY:  97.49   
FS-GREEN-MASK_results           PRECISION:  98.66    RECALL/SENSITIVITY:  98.66    F1:  98.66    SPECIFICITY:  98.54   


In [6]:
import h5py
import numpy as np
from pathlib import Path

files = list(Path("/data_isilon_main/isilon_images/10_MetaSystems/MetaSystemsData/MYCN_SpikeIn/PAPER_RESULTS/DILUTIONS").glob("*.h5"))
print(files)

[PosixPath('/data_isilon_main/isilon_images/10_MetaSystems/MetaSystemsData/MYCN_SpikeIn/PAPER_RESULTS/DILUTIONS/FS-RGB_results.h5'), PosixPath('/data_isilon_main/isilon_images/10_MetaSystems/MetaSystemsData/MYCN_SpikeIn/PAPER_RESULTS/DILUTIONS/AreaModel_results.h5'), PosixPath('/data_isilon_main/isilon_images/10_MetaSystems/MetaSystemsData/MYCN_SpikeIn/PAPER_RESULTS/DILUTIONS/SpotdetectionModel_results.h5'), PosixPath('/data_isilon_main/isilon_images/10_MetaSystems/MetaSystemsData/MYCN_SpikeIn/PAPER_RESULTS/DILUTIONS/CNN-GREEN-MASK_results.h5'), PosixPath('/data_isilon_main/isilon_images/10_MetaSystems/MetaSystemsData/MYCN_SpikeIn/PAPER_RESULTS/DILUTIONS/CNN-RGB_results.h5'), PosixPath('/data_isilon_main/isilon_images/10_MetaSystems/MetaSystemsData/MYCN_SpikeIn/PAPER_RESULTS/DILUTIONS/Basic-Classifier_results.h5'), PosixPath('/data_isilon_main/isilon_images/10_MetaSystems/MetaSystemsData/MYCN_SpikeIn/PAPER_RESULTS/DILUTIONS/LSTM-Classifier_results.h5'), PosixPath('/data_isilon_main/isi

In [8]:
import pandas as pd

#<KeysViewHDF5 ['CLB-MA', 'S11', 'S12', 'S19', 'S1b', 'S2', 'S20', 'S29', 'S3', 'S30', 'S31', 'S32', 'S33', 'S34', 'S4', 'S6', 'S7', 'S8', 'SK-N-MM', 'SK-N-SH', 'STA-NB-10', 'STA-NB-12']>

POSITIVE = "CLB-MA"
NEGATIVE = "SK-N-MM"

results_dict = {}
for file in files: 
    
    with h5py.File(file) as fout:
        
        pred_pos = np.array(fout[POSITIVE]["PRED"]).squeeze()
        pred_neg = np.array(fout[NEGATIVE]["PRED"]).squeeze()
        
    TP = len(np.where(pred_pos == 1)[0])
    TN = len(np.where(pred_neg == 0)[0])
    FP = len(np.where(pred_neg == 1)[0])
    FN = len(np.where(pred_pos == 0)[0])
    SPEC = np.round(TN / (TN + FP)*100,2)
    
    perc_pos = np.round(pred_pos.sum()/len(pred_pos)*100,2)
    perc_neg = np.round(pred_neg.sum()/len(pred_neg)*100,2)
    
    # Precision = TP / (TP + FP) 
    # Recall = TP / (TP + FN) 
    # F1 Score = 2 * Precision * Recall / (Precision + Recall)
    
    precision = np.round(TP/(TP+FP)*100,2)
    recall = np.round(TP/(TP+FN)*100,2)
    F1 = np.round(2*precision*recall/(precision+recall),2)
    ACC = (TP+TN)/(TP+TN-FP+FN)
    
    print(f"{file.stem:<30}", "PERCENTAGE: ", f"{perc_pos:<6}-  {perc_neg:<6}", " PRECISION: ", f"{precision:<8}", "RECALL/SENSITIVITY: ", f"{recall:<8}", "F1: ", f"{F1:<8}", "SPECIFICITY: ", f"{SPEC:<8}")
    
    results_dict[f"{file.stem.replace('_results','')}"] = {
        "precision": precision,
        "recall": recall,
        "F1": F1,
        "specificity": SPEC,
        "sensitivity": recall,
    }
print(len(pred_pos), len(pred_neg))
    
results_df = pd.DataFrame(results_dict)
results_df.to_excel("/home/simon_g/src/PAPER_RESULTS/unseen_metrics.xlsx", sheet_name="metrics")

FS-RGB_results                 PERCENTAGE:  99.7  -  11.9    PRECISION:  89.34    RECALL/SENSITIVITY:  99.7     F1:  94.24    SPECIFICITY:  88.1    
AreaModel_results              PERCENTAGE:  93.9  -  4.37    PRECISION:  95.56    RECALL/SENSITIVITY:  93.9     F1:  94.72    SPECIFICITY:  95.63   
SpotdetectionModel_results     PERCENTAGE:  40.17 -  2.2     PRECISION:  94.81    RECALL/SENSITIVITY:  40.17    F1:  56.43    SPECIFICITY:  97.8    
CNN-GREEN-MASK_results         PERCENTAGE:  99.4  -  1.7     PRECISION:  98.32    RECALL/SENSITIVITY:  99.4     F1:  98.86    SPECIFICITY:  98.3    
CNN-RGB_results                PERCENTAGE:  99.9  -  8.2     PRECISION:  92.41    RECALL/SENSITIVITY:  99.9     F1:  96.01    SPECIFICITY:  91.8    
Basic-Classifier_results       PERCENTAGE:  94.07 -  39.1    PRECISION:  70.64    RECALL/SENSITIVITY:  94.07    F1:  80.69    SPECIFICITY:  60.9    
LSTM-Classifier_results        PERCENTAGE:  97.03 -  4.83    PRECISION:  95.26    RECALL/SENSITIVITY:  97.

# Creation of all plots

In [None]:
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt

df_pred = pd.read_excel("/home/simon_g/src/results/EVALUATION/results_paper.xlsx", sheet_name="predictions", index_col=0).T
df_diff = pd.read_excel("/home/simon_g/src/results/EVALUATION/results_paper.xlsx", sheet_name="differences", index_col=0).T

In [None]:
df_pred = df_pred.reindex(
    ['AreaModel', 'SpotdetectionModel', 'BasicClassifier', 'LSTMClassifier', 'ClassificationCNN', 'FeaturespaceClassifier', 'WeightedFeaturespaceClassifier']
    )

In [None]:
df_diff = df_diff.reindex(
    ['AreaModel', 'SpotdetectionModel', 'BasicClassifier', 'LSTMClassifier', 'ClassificationCNN', 'FeaturespaceClassifier', 'WeightedFeaturespaceClassifier']
    )