Notebook to create figure 2. The exact figure obtained when running this notebook depends on the random train-test-split.

Imports

In [1]:
# Local dependencies
from NET_CUP.data_loading.feature_type import FeatureType
from NET_CUP.data_loading import data_tree, xyp
import NET_CUP.datasources_config as datasources_config

# Other dependencies
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import plotly.graph_objects as go
import plotly.subplots as sp

Settings

In [2]:
# Adjust these settings
feature_type = FeatureType.IMAGENET
classifier = SVC()

# Keep these settings
pca = PCA(0.95)
patches_per_patient = 100
patch_size = 4096
border_patches = True

Load data

In [3]:
data = data_tree.create_tree(datasources_config.PATIENTS_PATH,
                             datasources_config.ENUMBER_PATH)
data_tree.drop_slides_without_extracted_features(data, feature_type, datasources_config.UKE_DATASET_DIR)
pancreas_intestine_patients = data_tree.get_patients(data, 'pi')
others_patients = data_tree.get_patients(data, 'o')

train_patients, test_patients =  train_test_split(pancreas_intestine_patients, train_size=0.4, stratify=xyp.get_patient_level_y(pancreas_intestine_patients))
test_patients = test_patients + others_patients


Classifier training

In [4]:
X_train_patch_level, y_train_patch_level, _ = xyp.get_patch_level_Xyp_complete(train_patients, patches_per_patient, feature_type, patch_size, border_patches, datasources_config.UKE_DATASET_DIR)

X_train_patch_level = pca.fit_transform(X_train_patch_level)
classifier.fit(X_train_patch_level, y_train_patch_level)

Calculating patch distances and sum of all patch distances for each the test patiens

In [5]:
patch_distances = {data_tree.Origin.PANCREAS: [],
                   data_tree.Origin.SI: [],
                   data_tree.Origin.OTHER: []}

patient_sum_patch_distances = {data_tree.Origin.PANCREAS: [],
                               data_tree.Origin.SI: [],
                               data_tree.Origin.OTHER: []}

for test_patient in test_patients:
    X_test_patch_level, _, _ = xyp.get_patch_level_Xyp_complete([test_patient], patches_per_patient, feature_type, patch_size, border_patches, datasources_config.UKE_DATASET_DIR)
    test_patient_patch_distances = classifier.decision_function(pca.transform(X_test_patch_level))

    patch_distances[test_patient.origin].extend(test_patient_patch_distances)
    patient_sum_patch_distances[test_patient.origin].append(sum(test_patient_patch_distances))


Visualization

In [6]:
fig = sp.make_subplots(rows=1, cols=2, subplot_titles=['(a) Patch distances', '(b) Sum of all patch distances for each patient'])

xbins_patch_level = dict(start=-15,end=15, size=0.2)
fig.add_trace(go.Histogram(x=patch_distances[data_tree.Origin.PANCREAS], xbins=xbins_patch_level, autobinx=False, marker={'color':'#6C8EBF', 'opacity':0.6}, name='Pancreas'), row=1, col=1)
fig.add_trace(go.Histogram(x=patch_distances[data_tree.Origin.SI], xbins=xbins_patch_level, autobinx=False, marker={'color':'#D79B01', 'opacity':0.8}, name='Small intestine'), row=1, col=1)
fig.add_trace(go.Histogram(x=patch_distances[data_tree.Origin.OTHER], xbins=xbins_patch_level, autobinx=False, marker={'color':'#81B366', 'opacity':0.8}, name='Others'), row=1, col=1)

xbins_patient_level = dict(start=-500,end=500, size=50)
fig.add_trace(go.Histogram(x=patient_sum_patch_distances[data_tree.Origin.PANCREAS], xbins=xbins_patient_level, autobinx=False, marker={'color':'#6C8EBF', 'opacity':0.65}, showlegend=False), row=1, col=2)
fig.add_trace(go.Histogram(x=patient_sum_patch_distances[data_tree.Origin.SI], xbins=xbins_patient_level, autobinx=False, marker={'color':'#D79B01', 'opacity':0.65}, showlegend=False), row=1, col=2)
fig.add_trace(go.Histogram(x=patient_sum_patch_distances[data_tree.Origin.OTHER], xbins=xbins_patient_level, autobinx=False, marker={'color':'#81B366', 'opacity':0.7}, showlegend=False), row=1, col=2)

fig.update_xaxes(showline=True, linecolor='black', gridcolor='lightgrey')
fig.update_yaxes(showline=True, linecolor='black', gridcolor='lightgrey')

# Overlay both histograms
fig.update_layout(barmode='overlay', plot_bgcolor='white',
                  xaxis_title='Distance from the SVM hyperplane', xaxis2_title='Sum of patch distances from the SVM hyperplane',
                  yaxis_title='Number of patches', yaxis2_title='Number of patients',
                  margin=dict(l=10, r=10, t=30, b=10), height=402, width=1102)
fig.update_traces(marker_line_width=0.5,marker_line_color="black")

fig.show()