In [1]:
import numpy as np 
import matplotlib.pyplot as plt
import csv 
import pickle
from scipy import stats
from sklearn.metrics import roc_auc_score, precision_score, accuracy_score, f1_score, recall_score
from sklearn.linear_model import LogisticRegression
from meerkat import DataPanel

from domino.data.cxr import get_dp, build_cxr_df, get_cxr_activations, rle2mask


ModuleNotFoundError: No module named 'meerkat'

## Extract train/test features from Image-Only model

In [2]:
# Get a mosaic DataPanel with the data.
df = build_cxr_df.out(load=True)
dp = get_dp(df)
dp.head()

Unnamed: 0,image_id (NumpyArrayColumn),encoded_pixels (NumpyArrayColumn),pmx (NumpyArrayColumn),filepath (NumpyArrayColumn),chest_tube (NumpyArrayColumn),split (NumpyArrayColumn),gaze_seq (NumpyArrayColumn),gaze_heatmap (NumpyArrayColumn),gaze_max_visit (NumpyArrayColumn),gaze_unique (NumpyArrayColumn),gaze_time (NumpyArrayColumn),gaze_diffusivity (NumpyArrayColumn),expert_label (NumpyArrayColumn),index (ListColumn),input (CellColumn),img (CellColumn)
0,1.2.276.0.7230010.3.1.4.8323329.6904.151787520...,-1,0,/media/4tb_hdd/siim/dicom-images-train/1.2.276...,,train,,,,,,,,'0',MedicalVolumeCell([PosixPath('/media/4tb_hdd/s...,MedicalVolumeCell([PosixPath('/media/4tb_hdd/s...
1,1.2.276.0.7230010.3.1.4.8323329.13666.15178752...,557374 2 1015 8 1009 14 1002 20 997 26 990 32 ...,1,/media/4tb_hdd/siim/dicom-images-train/1.2.276...,,train,,,,,,,,'1',MedicalVolumeCell([PosixPath('/media/4tb_hdd/s...,MedicalVolumeCell([PosixPath('/media/4tb_hdd/s...
2,1.2.276.0.7230010.3.1.4.8323329.11028.15178752...,-1,0,/media/4tb_hdd/siim/dicom-images-train/1.2.276...,,train,,,,,,,,'2',MedicalVolumeCell([PosixPath('/media/4tb_hdd/s...,MedicalVolumeCell([PosixPath('/media/4tb_hdd/s...
3,1.2.276.0.7230010.3.1.4.8323329.10366.15178752...,514175 10 1008 29 994 30 993 32 991 33 990 34 ...,1,/media/4tb_hdd/siim/dicom-images-train/1.2.276...,,train,,,,,,,,'3',MedicalVolumeCell([PosixPath('/media/4tb_hdd/s...,MedicalVolumeCell([PosixPath('/media/4tb_hdd/s...
4,1.2.276.0.7230010.3.1.4.8323329.10016.15178752...,592184 33 976 58 956 73 941 88 926 102 917 109...,1,/media/4tb_hdd/siim/dicom-images-train/1.2.276...,,train,,,,,,,,'4',MedicalVolumeCell([PosixPath('/media/4tb_hdd/s...,MedicalVolumeCell([PosixPath('/media/4tb_hdd/s...


In [3]:
model_pth = "/media/nvme_data/observational_results_10_2020/original/cxr/emmental_cam/cam_0/seed_0/best_model_target_cxr_val_accuracy.pth"

act_dp = get_cxr_activations(dp=dp, model_path=model_pth)


HBox(children=(FloatProgress(value=0.0, max=189.0), HTML(value='')))




In [4]:
# train mask are points that have gaze
train_mask = dp["gaze_seq"].data != "nan"
act_dp[train_mask].head()

Unnamed: 0,image_id (NumpyArrayColumn),encoded_pixels (NumpyArrayColumn),pmx (NumpyArrayColumn),filepath (NumpyArrayColumn),chest_tube (NumpyArrayColumn),split (NumpyArrayColumn),gaze_seq (NumpyArrayColumn),gaze_heatmap (NumpyArrayColumn),gaze_max_visit (NumpyArrayColumn),gaze_unique (NumpyArrayColumn),gaze_time (NumpyArrayColumn),gaze_diffusivity (NumpyArrayColumn),expert_label (NumpyArrayColumn),index (ListColumn),input (TensorColumn),img (ListColumn),pred (NumpyArrayColumn),probs (NumpyArrayColumn),activation_block4 (NumpyArrayColumn)
0,1.2.276.0.7230010.3.1.4.8323329.1857.151787516...,378999 2 1018 8 1013 13 1009 15 1007 17 1006 1...,1,/media/4tb_hdd/siim/dicom-images-train/1.2.276...,1.0,train,"[(0.3002531741212215, 0.9348943074817682, 2), ...",[[ 0. 0. 0. 14. 2. 2. 1. 0.]\n [ 0. 0. ...,14.0,12.0,33.0,0.545455,1.0,'29',"np.ndarray(shape=torch.Size([3, 224, 224]))",<PIL.Image.Image image mode=L size=1024x1024 a...,0,"np.ndarray(shape=(2,))","np.ndarray(shape=(2048, 7, 7))"
1,1.2.276.0.7230010.3.1.4.8323329.1219.151787516...,-1,0,/media/4tb_hdd/siim/dicom-images-train/1.2.276...,1.0,train,"[(0.3135288701615107, 0.5596042556980056, 2), ...",[[0. 0. 0. 1. 3. 0. 0. 0.]\n [0. 0. 0. 3. 3. 2...,7.0,19.0,46.0,0.369565,1.0,'39',"np.ndarray(shape=torch.Size([3, 224, 224]))",<PIL.Image.Image image mode=L size=1024x1024 a...,0,"np.ndarray(shape=(2,))","np.ndarray(shape=(2048, 7, 7))"
2,1.2.276.0.7230010.3.1.4.8323329.2118.151787517...,591104 8 1011 18 1001 26 993 32 988 36 986 37 ...,1,/media/4tb_hdd/siim/dicom-images-train/1.2.276...,1.0,train,"[(0.1227472372112641, 0.5808183519907184, 3), ...",[[ 0. 0. 0. 0. 4. 1. 0. 1.]\n [ 0. 0. ...,10.0,23.0,81.0,0.395062,1.0,'43',"np.ndarray(shape=torch.Size([3, 224, 224]))",<PIL.Image.Image image mode=L size=1024x1024 a...,0,"np.ndarray(shape=(2,))","np.ndarray(shape=(2048, 7, 7))"
3,1.2.276.0.7230010.3.1.4.8323329.2027.151787517...,500845 25 992 38 983 41 980 44 978 46 975 49 9...,1,/media/4tb_hdd/siim/dicom-images-train/1.2.276...,0.0,train,"[(0.3294461311053661, 0.5748967530438639, 4), ...",[[ 0. 0. 4. 1. 10. 8. 0. 0.]\n [ 0. 0. ...,11.0,21.0,78.0,0.641026,1.0,'50',"np.ndarray(shape=torch.Size([3, 224, 224]))",<PIL.Image.Image image mode=L size=1024x1024 a...,0,"np.ndarray(shape=(2,))","np.ndarray(shape=(2048, 7, 7))"
4,1.2.276.0.7230010.3.1.4.8323329.1972.151787517...,-1,0,/media/4tb_hdd/siim/dicom-images-train/1.2.276...,0.0,train,"[(0.25439717874245193, 0.6421419414319375, 2),...",[[ 0. 0. 5. 0. 0. 2. 0. 0.]\n [ 0. 1. ...,10.0,22.0,75.0,0.266667,0.0,'53',"np.ndarray(shape=torch.Size([3, 224, 224]))",<PIL.Image.Image image mode=L size=1024x1024 a...,0,"np.ndarray(shape=(2,))","np.ndarray(shape=(2048, 7, 7))"


In [5]:
# test mask are points that do not have gaze and have chest tube labels
test_mask = np.logical_and(~train_mask, ~np.isnan(dp["chest_tube"]))
act_dp[test_mask].head()

Unnamed: 0,image_id (NumpyArrayColumn),encoded_pixels (NumpyArrayColumn),pmx (NumpyArrayColumn),filepath (NumpyArrayColumn),chest_tube (NumpyArrayColumn),split (NumpyArrayColumn),gaze_seq (NumpyArrayColumn),gaze_heatmap (NumpyArrayColumn),gaze_max_visit (NumpyArrayColumn),gaze_unique (NumpyArrayColumn),gaze_time (NumpyArrayColumn),gaze_diffusivity (NumpyArrayColumn),expert_label (NumpyArrayColumn),index (ListColumn),input (TensorColumn),img (ListColumn),pred (NumpyArrayColumn),probs (NumpyArrayColumn),activation_block4 (NumpyArrayColumn)
0,1.2.276.0.7230010.3.1.4.8323329.32395.15178751...,-1,0,/media/4tb_hdd/siim/dicom-images-train/1.2.276...,0.0,test,,,,,,,,'7',"np.ndarray(shape=torch.Size([3, 224, 224]))",<PIL.Image.Image image mode=L size=1024x1024 a...,0,"np.ndarray(shape=(2,))","np.ndarray(shape=(2048, 7, 7))"
1,1.2.276.0.7230010.3.1.4.8323329.12084.15178752...,-1,0,/media/4tb_hdd/siim/dicom-images-train/1.2.276...,0.0,test,,,,,,,,'26',"np.ndarray(shape=torch.Size([3, 224, 224]))",<PIL.Image.Image image mode=L size=1024x1024 a...,0,"np.ndarray(shape=(2,))","np.ndarray(shape=(2048, 7, 7))"
2,1.2.276.0.7230010.3.1.4.8323329.13325.15178752...,-1,0,/media/4tb_hdd/siim/dicom-images-train/1.2.276...,0.0,test,,,,,,,,'36',"np.ndarray(shape=torch.Size([3, 224, 224]))",<PIL.Image.Image image mode=L size=1024x1024 a...,0,"np.ndarray(shape=(2,))","np.ndarray(shape=(2048, 7, 7))"
3,1.2.276.0.7230010.3.1.4.8323329.11640.15178752...,-1,0,/media/4tb_hdd/siim/dicom-images-train/1.2.276...,0.0,test,,,,,,,,'47',"np.ndarray(shape=torch.Size([3, 224, 224]))",<PIL.Image.Image image mode=L size=1024x1024 a...,0,"np.ndarray(shape=(2,))","np.ndarray(shape=(2048, 7, 7))"
4,1.2.276.0.7230010.3.1.4.8323329.14529.15178752...,-1,0,/media/4tb_hdd/siim/dicom-images-train/1.2.276...,0.0,test,,,,,,,,'66',"np.ndarray(shape=torch.Size([3, 224, 224]))",<PIL.Image.Image image mode=L size=1024x1024 a...,0,"np.ndarray(shape=(2,))","np.ndarray(shape=(2048, 7, 7))"


In [6]:
from domino.feedback import FeedbackInterface

In [7]:
#FeedbackInterface(act_dp[train_mask], label_column="pmx", save_dir="/media/4tb_hdd/siim/feedback")

In [8]:
scribble_dp = DataPanel().read(path="/media/4tb_hdd/siim/feedback/fb_21-06-23_16-23_f96391.dp")

In [9]:
scribble_dp.head()

Unnamed: 0,image_id (NumpyArrayColumn),index (ListColumn),feedback_label (NumpyArrayColumn),feedback_mask (ListColumn)
0,1.2.276.0.7230010.3.1.4.8323329.1460.151787516...,'7109',positive,"array([[[0, 0, 0],\n [0, 0, 0],\n ..."
1,1.2.276.0.7230010.3.1.4.8323329.2039.151787517...,'1569',negative,"array([[[0, 0, 0],\n [0, 0, 0],\n ..."
2,1.2.276.0.7230010.3.1.4.8323329.2091.151787517...,'10175',negative,"array([[[0, 0, 0],\n [0, 0, 0],\n ..."
3,1.2.276.0.7230010.3.1.4.8323329.2207.151787517...,'11341',negative,"array([[[0, 0, 0],\n [0, 0, 0],\n ..."
4,1.2.276.0.7230010.3.1.4.8323329.1485.151787516...,'8490',positive,"array([[[0, 0, 0],\n [0, 0, 0],\n ..."


In [10]:
(scribble_dp['feedback_label'].data!='unlabeled').sum()

50

In [11]:
## Merge feedback cols with act_dp
act_scribble_dp = act_dp.merge(scribble_dp, how="left", on="image_id")
act_scribble_dp.head()

Unnamed: 0,image_id (NumpyArrayColumn),encoded_pixels (NumpyArrayColumn),pmx (NumpyArrayColumn),filepath (NumpyArrayColumn),chest_tube (NumpyArrayColumn),split (NumpyArrayColumn),gaze_seq (NumpyArrayColumn),gaze_heatmap (NumpyArrayColumn),gaze_max_visit (NumpyArrayColumn),gaze_unique (NumpyArrayColumn),...,gaze_diffusivity (NumpyArrayColumn),expert_label (NumpyArrayColumn),input (CellColumn),img (CellColumn),pred (NumpyArrayColumn),probs (NumpyArrayColumn),activation_block4 (NumpyArrayColumn),feedback_label (ListColumn),feedback_mask (ListColumn),index (ListColumn)
0,1.2.276.0.7230010.3.1.4.8323329.6904.151787520...,-1,0,/media/4tb_hdd/siim/dicom-images-train/1.2.276...,,train,,,,,...,,,MedicalVolumeCell([PosixPath('/media/4tb_hdd/s...,MedicalVolumeCell([PosixPath('/media/4tb_hdd/s...,0,"np.ndarray(shape=(2,))","np.ndarray(shape=(2048, 7, 7))",,,'0'
1,1.2.276.0.7230010.3.1.4.8323329.13666.15178752...,557374 2 1015 8 1009 14 1002 20 997 26 990 32 ...,1,/media/4tb_hdd/siim/dicom-images-train/1.2.276...,,train,,,,,...,,,MedicalVolumeCell([PosixPath('/media/4tb_hdd/s...,MedicalVolumeCell([PosixPath('/media/4tb_hdd/s...,0,"np.ndarray(shape=(2,))","np.ndarray(shape=(2048, 7, 7))",,,'1'
2,1.2.276.0.7230010.3.1.4.8323329.11028.15178752...,-1,0,/media/4tb_hdd/siim/dicom-images-train/1.2.276...,,train,,,,,...,,,MedicalVolumeCell([PosixPath('/media/4tb_hdd/s...,MedicalVolumeCell([PosixPath('/media/4tb_hdd/s...,0,"np.ndarray(shape=(2,))","np.ndarray(shape=(2048, 7, 7))",,,'2'
3,1.2.276.0.7230010.3.1.4.8323329.10366.15178752...,514175 10 1008 29 994 30 993 32 991 33 990 34 ...,1,/media/4tb_hdd/siim/dicom-images-train/1.2.276...,,train,,,,,...,,,MedicalVolumeCell([PosixPath('/media/4tb_hdd/s...,MedicalVolumeCell([PosixPath('/media/4tb_hdd/s...,0,"np.ndarray(shape=(2,))","np.ndarray(shape=(2048, 7, 7))",,,'3'
4,1.2.276.0.7230010.3.1.4.8323329.10016.15178752...,592184 33 976 58 956 73 941 88 926 102 917 109...,1,/media/4tb_hdd/siim/dicom-images-train/1.2.276...,,train,,,,,...,,,MedicalVolumeCell([PosixPath('/media/4tb_hdd/s...,MedicalVolumeCell([PosixPath('/media/4tb_hdd/s...,0,"np.ndarray(shape=(2,))","np.ndarray(shape=(2048, 7, 7))",,,'4'


In [12]:
len(act_scribble_dp)

12047

In [13]:
from domino.feedback import ScribbleModel

In [14]:
feedback_label_arr = np.array(act_scribble_dp["feedback_label"].data)
scribbled_train_mask = np.logical_or(feedback_label_arr == "positive",feedback_label_arr == "negative")

scrib_model = ScribbleModel(threshold=0,strategy="mask_pos_v")
scrib_model.fit(act_scribble_dp[scribbled_train_mask],activation_col="activation_block4")

tubelabel_mask = ~np.isnan(act_scribble_dp["chest_tube"].data)
scribble_test_mask = np.logical_and(tubelabel_mask,~scribbled_train_mask)
y_hat = scrib_model.predict(act_scribble_dp[scribble_test_mask],activation_col="activation_block4")

In [15]:
y = act_scribble_dp[scribble_test_mask]["chest_tube"]

In [16]:
roc_auc_score(y,y_hat)

0.9012328232840243

In [17]:
model_probs = act_scribble_dp[scribble_test_mask]["probs"].data[:,1]
roc_auc_score(y,model_probs)

0.892167530566217

In [19]:
roc_auc_score(y,y_hat+model_probs)

0.9129406876904206