# Sheet properties

In [15]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

# Imports

In [16]:
import sys
sys.path.append('/home/rcendre/classification')
from joblib import dump, load
from numpy import array, logspace
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.svm import SVC
from sklearn.metrics import f1_score
from toolbox.classification.common import Data, Folds, IO, Tools
from toolbox.classification.parameters import Dermatology, Settings
from toolbox.transforms.common import FlattenTransform
from toolbox.transforms.labels import OrderedEncoder
from toolbox.views.common import Views, ViewsTools
from toolbox.models.models import DecisionVotingClassifier, ScoreVotingClassifier
from toolbox.transforms.images import HaralickImageTransform, SpatialImageTransform
import warnings
warnings.filterwarnings('ignore') 

# Parameters

In [17]:
# Advanced parameters
validation = 4
settings = Settings.get_default_dermatology()

# Patch classifier

In [18]:
patch = Dermatology.images(modality='Microscopy', data_type='Patch', use_unknown=False)
# Transform labels
label_encoder = OrderedEncoder().fit(['Normal', 'Benign', 'Malignant'])
Tools.transform(patch, {'datum': 'Label'}, label_encoder, 'LabelEncode')
# Transform groups
group_encoder = LabelEncoder().fit(array(patch['ID_Patient'].tolist()))
Tools.transform(patch, {'datum': 'ID_Patient'}, group_encoder, 'GroupEncode')
# Extract features 
Tools.transform(patch, {'datum': 'Datum'}, HaralickImageTransform(), 'Spatial')
# Make folds
Folds.build_group_folds(patch, {'datum': 'Datum', 'label_encode': 'LabelEncode', 'group': 'GroupEncode'}, validation)
IO.save(patch, f'Instances_PatchSpatial.pickle')

In [19]:
low_predictor = Pipeline([('scale', MinMaxScaler()),('clf', SVC(kernel='linear', class_weight='balanced', decision_function_shape='ovo', probability=True))])
low_params = {'clf__C': logspace(-2, 3, 6).tolist()}

## Evaluate

In [20]:
Tools.evaluate(patch, {'datum': 'Spatial', 'label_encode': 'LabelEncode'}, low_predictor, 'Evaluate', distribution=low_params)

Evaluation achieved!

In [21]:
from IPython.display import HTML
from IPython.display import display
display(HTML(ViewsTools.dataframe_renderer([Views.report(patch, {'label_encode': 'LabelEncode', 'eval': 'Evaluate'}, label_encoder)],
                                                                        title=[f'Test - Patch SVC'])))

Unnamed: 0,precision,recall,f1-score,support
Normal,0.86±0.09,0.72±0.12,0.79±0.10,1880.00±143.60
Benign,0.73±0.10,0.64±0.15,0.69±0.11,2332.00±179.52
Malignant,0.33±0.16,0.53±0.09,0.41±0.13,936.00±62.70
accuracy,0.65±0.11,0.65±0.11,0.65±0.11,0.65±0.11
macro avg,0.64±0.07,0.63±0.09,0.63±0.09,5148.00±0.71
weighted avg,0.71±0.08,0.65±0.11,0.67±0.10,5148.00±0.71


## Fit

In [22]:
low_predictor = Tools.fit(patch, {'datum': 'Spatial', 'label_encode': 'LabelEncode'}, low_predictor, distribution=low_params)

In [23]:
dump(low_predictor, 'Instances_SpatialPredictor')

['Instances_SpatialPredictor']

# Sliding window

In [24]:
inputs_options = []
inputs_options.append((250,0))
inputs_options.append((250,25))
inputs_options.append((250,50))
inputs_options.append((500,0))
inputs_options.append((500,25))
inputs_options.append((500,50))

In [25]:
low_predictor = load('Instances_SpatialPredictor')
for size, overlap in inputs_options:
    inputs = Dermatology.sliding_images(size=size, overlap=overlap/100, modality='Microscopy')
    Tools.transform(inputs, {'datum': 'Datum'}, HaralickImageTransform(), 'Spatial')
    # Transform groups
    group_encoder = LabelEncoder().fit(array(inputs['ID_Patient'].tolist()))
    Tools.transform(inputs, {'datum': 'ID_Patient'}, group_encoder, 'GroupEncode')
    # Transform labels
    label_encoder = OrderedEncoder().fit(['Normal', 'Benign', 'Malignant'])
    Tools.transform(inputs, {'datum': 'Label'}, label_encoder, 'LabelEncode')
    # Make folds
    Folds.build_group_folds(inputs, {'datum': 'Datum', 'label_encode': 'LabelEncode', 'group': 'GroupEncode'}, validation)
    IO.save(inputs, f'Instances_Spatial_{size}_{overlap}.pickle')

Progress: |███████████████████████████████████████████████████████████████████████████████████████████████████-| 100.0 

# Low level predictions

In [26]:
for size, overlap in inputs_options:
    inputs = IO.load(f'Instances_Spatial_{size}_{overlap}.pickle')
    
    # Add scaling
    Tools.predict(inputs, {'datum': 'Spatial'}, low_predictor, Tools.PREDICTION, mask=inputs.Type != 'Full')
    Tools.predict_proba(inputs, {'datum': 'Spatial'}, low_predictor, Tools.PROBABILITY, mask=inputs.Type != 'Full')

    Data.build_bags(inputs, inputs.Type == 'Full', 'Reference', inputs.Type == 'Instance', 'Source', Tools.PREDICTION)
    Data.build_bags(inputs, inputs.Type == 'Full', 'Reference', inputs.Type == 'Instance', 'Source', Tools.PROBABILITY)

    # Filter elements
    inputs = inputs[inputs.Type == 'Full']
    
    # Low Level
    IO.save(inputs, f'Instances_LowSpatial_{size}_{overlap}.pickle')

# High level predictions

In [27]:
for size, overlap in inputs_options:
    inputs = IO.load(f'Instances_LowSpatial_{size}_{overlap}.pickle')
    
    # Decisions
    Tools.evaluate(inputs, {'datum': Tools.PREDICTION, 'label_encode': 'LabelEncode'}, DecisionVotingClassifier(mode='at_least_one', metric=f1_score), 'D_ALO')
    Tools.evaluate(inputs, {'datum': Tools.PREDICTION, 'label_encode': 'LabelEncode'}, DecisionVotingClassifier(mode='dynamic_thresh', metric=f1_score), 'D_DYN')

    # Score
    Tools.evaluate(inputs, {'datum': Tools.PROBABILITY, 'label_encode': 'LabelEncode'}, ScoreVotingClassifier(low='max', high='dynamic'), 'S_MAX')
    
    # SVC
    score_svc = Pipeline([('flat', FlattenTransform()), ('clf', SVC(probability=True))])
    score_params = {'clf__C': logspace(-2, 3, 6).tolist()}
    Tools.evaluate(inputs, {'datum': Tools.PROBABILITY, 'label_encode': 'LabelEncode'}, score_svc, 'High_SVC', distribution=score_params)

    # High Level    
    IO.save(inputs, f'Instances_HighSpatial_{size}_{overlap}.pickle')

[0.6875 0.375  0.125 ]
[0.75   0.1875 0.1875]
[0.6875 0.3125 0.1875]
[0.6875 0.3125 0.125 ]
[0.64 0.4  0.12]...!
[0.64 0.32 0.2 ]...
[0.64 0.32 0.16]...
[0.64 0.32 0.16]...
[0.55102041 0.34693878 0.12244898]
[0.69387755 0.34693878 0.18367347]
[0.55102041 0.34693878 0.20408163]
[0.55102041 0.36734694 0.14285714]
[0.5  0.5  0.25]...!
[0.5  0.5  0.25]...
[0.5  0.5  0.25]...
[0.5  0.75 0.25]...
[0.5  0.75 0.25]...!
[0.25 0.75 0.25]...
[0.25 0.75 0.  ]...
[0.5  0.75 0.25]...
[0.33333333 0.66666667 0.11111111]
[0.33333333 0.55555556 0.11111111]
[0.33333333 0.66666667 0.11111111]
[0.33333333 0.77777778 0.11111111]
Evaluation achieved!

## Analysis

In [28]:
from IPython.display import HTML
from IPython.display import display

label_encoder = OrderedEncoder().fit(['Normal', 'Benign', 'Malignant'])
models = ['D_ALO', 'D_DYN', 'S_MAX', 'High_SVC']

for size, overlap in inputs_options:
    inputs = IO.load(f'Instances_HighSpatial_{size}_{overlap}.pickle')
    
    for high_name in models:
        name = f'{high_name}_{size}_{overlap}'
        # Details and results
        display(HTML(ViewsTools.dataframe_renderer([Views.report(inputs, {'label_encode': 'LabelEncode', 'eval': high_name}, label_encoder)],
                                                                    title=[f'Test - {name}']))) 

Unnamed: 0,precision,recall,f1-score,support
Normal,0.46±0.30,0.09±0.06,0.15±0.10,757.00±84.88
Benign,0.76±0.03,0.35±0.03,0.48±0.03,2204.00±50.92
Malignant,0.53±0.05,0.91±0.03,0.67±0.04,2470.00±69.90
accuracy,0.57±0.03,0.57±0.03,0.57±0.03,0.57±0.03
macro avg,0.58±0.10,0.45±0.03,0.43±0.03,5431.00±20.73
weighted avg,0.61±0.03,0.57±0.03,0.52±0.02,5431.00±20.73


Unnamed: 0,precision,recall,f1-score,support
Normal,0.45±0.13,0.39±0.24,0.42±0.17,757.00±84.88
Benign,0.71±0.06,0.56±0.12,0.62±0.06,2204.00±50.92
Malignant,0.61±0.05,0.76±0.11,0.68±0.05,2470.00±69.90
accuracy,0.62±0.02,0.62±0.02,0.62±0.02,0.62±0.02
macro avg,0.59±0.04,0.57±0.06,0.57±0.04,5431.00±20.73
weighted avg,0.63±0.02,0.62±0.02,0.62±0.02,5431.00±20.73


Unnamed: 0,precision,recall,f1-score,support
Normal,0.19±0.09,0.47±0.18,0.27±0.10,757.00±84.88
Benign,0.74±0.05,0.41±0.06,0.53±0.05,2204.00±50.92
Malignant,0.64±0.06,0.61±0.08,0.63±0.04,2470.00±69.90
accuracy,0.51±0.02,0.51±0.02,0.51±0.02,0.51±0.02
macro avg,0.53±0.02,0.50±0.04,0.48±0.02,5431.00±20.73
weighted avg,0.62±0.03,0.51±0.02,0.54±0.02,5431.00±20.73


Unnamed: 0,precision,recall,f1-score,support
Normal,0.35±0.16,0.14±0.02,0.20±0.03,757.00±84.88
Benign,0.62±0.03,0.71±0.06,0.66±0.04,2204.00±50.92
Malignant,0.64±0.07,0.68±0.06,0.66±0.04,2470.00±69.90
accuracy,0.62±0.04,0.62±0.04,0.62±0.04,0.62±0.04
macro avg,0.54±0.04,0.51±0.02,0.51±0.02,5431.00±20.73
weighted avg,0.60±0.04,0.62±0.04,0.60±0.05,5431.00±20.73


Unnamed: 0,precision,recall,f1-score,support
Normal,0.24±0.20,0.03±0.02,0.05±0.04,757.00±107.09
Benign,0.79±0.03,0.30±0.04,0.44±0.04,2204.00±57.94
Malignant,0.51±0.11,0.94±0.01,0.67±0.10,2470.00±137.06
accuracy,0.55±0.09,0.55±0.09,0.55±0.09,0.55±0.09
macro avg,0.52±0.04,0.42±0.00,0.38±0.03,5431.00±7.82
weighted avg,0.59±0.03,0.55±0.09,0.49±0.10,5431.00±7.82


Unnamed: 0,precision,recall,f1-score,support
Normal,0.39±0.17,0.37±0.17,0.38±0.15,757.00±107.09
Benign,0.72±0.06,0.56±0.11,0.63±0.06,2204.00±57.94
Malignant,0.62±0.15,0.75±0.08,0.68±0.11,2470.00±137.06
accuracy,0.62±0.07,0.62±0.07,0.62±0.07,0.62±0.07
macro avg,0.58±0.04,0.56±0.06,0.56±0.06,5431.00±7.82
weighted avg,0.63±0.04,0.62±0.07,0.62±0.07,5431.00±7.82


Unnamed: 0,precision,recall,f1-score,support
Normal,0.16±0.12,0.36±0.12,0.22±0.10,757.00±107.09
Benign,0.70±0.09,0.45±0.07,0.55±0.05,2204.00±57.94
Malignant,0.61±0.15,0.57±0.11,0.59±0.06,2470.00±137.06
accuracy,0.49±0.01,0.49±0.01,0.49±0.01,0.49±0.01
macro avg,0.49±0.03,0.46±0.04,0.45±0.02,5431.00±7.82
weighted avg,0.59±0.03,0.49±0.01,0.52±0.03,5431.00±7.82


Unnamed: 0,precision,recall,f1-score,support
Normal,0.45±0.37,0.17±0.07,0.24±0.12,757.00±107.09
Benign,0.60±0.06,0.72±0.06,0.65±0.04,2204.00±57.94
Malignant,0.70±0.12,0.71±0.05,0.71±0.06,2470.00±137.06
accuracy,0.64±0.05,0.64±0.05,0.64±0.05,0.64±0.05
macro avg,0.58±0.08,0.53±0.01,0.53±0.01,5431.00±7.82
weighted avg,0.62±0.02,0.64±0.05,0.62±0.06,5431.00±7.82


Unnamed: 0,precision,recall,f1-score,support
Normal,0.27±0.33,0.02±0.02,0.04±0.04,757.00±100.81
Benign,0.83±0.08,0.25±0.07,0.39±0.08,2204.00±18.49
Malignant,0.50±0.10,0.96±0.02,0.66±0.08,2470.00±108.49
accuracy,0.54±0.09,0.54±0.09,0.54±0.09,0.54±0.09
macro avg,0.53±0.08,0.41±0.01,0.36±0.05,5431.00±8.47
weighted avg,0.60±0.07,0.54±0.09,0.46±0.11,5431.00±8.47


Unnamed: 0,precision,recall,f1-score,support
Normal,0.40±0.24,0.41±0.19,0.41±0.21,757.00±100.81
Benign,0.71±0.08,0.58±0.10,0.64±0.02,2204.00±18.49
Malignant,0.63±0.12,0.73±0.12,0.68±0.07,2470.00±108.49
accuracy,0.63±0.03,0.63±0.03,0.63±0.03,0.63±0.03
macro avg,0.58±0.05,0.58±0.04,0.57±0.04,5431.00±8.47
weighted avg,0.63±0.03,0.63±0.03,0.62±0.03,5431.00±8.47


Unnamed: 0,precision,recall,f1-score,support
Normal,0.16±0.11,0.35±0.13,0.21±0.11,757.00±100.81
Benign,0.72±0.06,0.38±0.10,0.50±0.07,2204.00±18.49
Malignant,0.61±0.13,0.64±0.11,0.63±0.07,2470.00±108.49
accuracy,0.50±0.03,0.50±0.03,0.50±0.03,0.50±0.03
macro avg,0.50±0.02,0.46±0.03,0.45±0.01,5431.00±8.47
weighted avg,0.59±0.09,0.50±0.03,0.52±0.06,5431.00±8.47


Unnamed: 0,precision,recall,f1-score,support
Normal,0.42±0.35,0.12±0.06,0.18±0.09,757.00±100.81
Benign,0.61±0.11,0.73±0.04,0.67±0.06,2204.00±18.49
Malignant,0.67±0.11,0.71±0.11,0.69±0.08,2470.00±108.49
accuracy,0.64±0.07,0.64±0.07,0.64±0.07,0.64±0.07
macro avg,0.57±0.06,0.52±0.02,0.51±0.03,5431.00±8.47
weighted avg,0.61±0.05,0.64±0.07,0.61±0.08,5431.00±8.47


Unnamed: 0,precision,recall,f1-score,support
Normal,0.47±0.29,0.14±0.07,0.22±0.05,757.00±141.41
Benign,0.61±0.08,0.68±0.05,0.64±0.07,2204.00±105.79
Malignant,0.60±0.07,0.67±0.07,0.63±0.05,2470.00±85.70
accuracy,0.60±0.06,0.60±0.06,0.60±0.06,0.60±0.06
macro avg,0.56±0.06,0.50±0.02,0.50±0.04,5431.00±87.78
weighted avg,0.59±0.03,0.60±0.06,0.58±0.08,5431.00±87.78


Unnamed: 0,precision,recall,f1-score,support
Normal,0.45±0.31,0.26±0.07,0.33±0.09,757.00±141.41
Benign,0.63±0.06,0.64±0.06,0.64±0.06,2204.00±105.79
Malignant,0.60±0.07,0.67±0.07,0.63±0.05,2470.00±85.70
accuracy,0.60±0.04,0.60±0.04,0.60±0.04,0.60±0.04
macro avg,0.56±0.07,0.52±0.02,0.53±0.01,5431.00±87.78
weighted avg,0.59±0.04,0.60±0.04,0.59±0.04,5431.00±87.78


Unnamed: 0,precision,recall,f1-score,support
Normal,0.19±0.15,0.60±0.15,0.29±0.16,757.00±141.41
Benign,0.68±0.06,0.40±0.01,0.50±0.03,2204.00±105.79
Malignant,0.67±0.11,0.49±0.18,0.56±0.15,2470.00±85.70
accuracy,0.47±0.08,0.47±0.08,0.47±0.08,0.47±0.08
macro avg,0.51±0.03,0.49±0.02,0.45±0.09,5431.00±87.78
weighted avg,0.61±0.05,0.47±0.08,0.50±0.05,5431.00±87.78


Unnamed: 0,precision,recall,f1-score,support
Normal,0.31±0.16,0.10±0.09,0.15±0.11,757.00±141.41
Benign,0.60±0.09,0.72±0.05,0.65±0.06,2204.00±105.79
Malignant,0.66±0.06,0.68±0.10,0.67±0.07,2470.00±85.70
accuracy,0.61±0.05,0.61±0.05,0.61±0.05,0.61±0.05
macro avg,0.52±0.09,0.50±0.03,0.49±0.06,5431.00±87.78
weighted avg,0.59±0.11,0.61±0.05,0.59±0.08,5431.00±87.78


Unnamed: 0,precision,recall,f1-score,support
Normal,0.10±0.17,0.01±0.04,0.03±0.02,757.00±141.41
Benign,0.61±0.07,0.68±0.04,0.64±0.05,2204.00±105.79
Malignant,0.59±0.07,0.69±0.09,0.64±0.07,2470.00±85.70
accuracy,0.59±0.06,0.59±0.06,0.59±0.06,0.59±0.06
macro avg,0.44±0.03,0.46±0.02,0.43±0.03,5431.00±87.78
weighted avg,0.53±0.05,0.59±0.06,0.55±0.08,5431.00±87.78


Unnamed: 0,precision,recall,f1-score,support
Normal,0.49±0.36,0.16±0.18,0.24±0.17,757.00±141.41
Benign,0.65±0.28,0.50±0.28,0.57±0.28,2204.00±105.79
Malignant,0.54±0.05,0.76±0.17,0.63±0.05,2470.00±85.70
accuracy,0.57±0.04,0.57±0.04,0.57±0.04,0.57±0.04
macro avg,0.56±0.17,0.47±0.09,0.48±0.12,5431.00±87.78
weighted avg,0.58±0.15,0.57±0.04,0.55±0.10,5431.00±87.78


Unnamed: 0,precision,recall,f1-score,support
Normal,0.20±0.15,0.47±0.25,0.28±0.15,757.00±141.41
Benign,0.68±0.07,0.52±0.02,0.59±0.02,2204.00±105.79
Malignant,0.62±0.09,0.49±0.13,0.55±0.09,2470.00±85.70
accuracy,0.50±0.05,0.50±0.05,0.50±0.05,0.50±0.05
macro avg,0.50±0.04,0.49±0.04,0.47±0.06,5431.00±87.78
weighted avg,0.59±0.07,0.50±0.05,0.53±0.03,5431.00±87.78


Unnamed: 0,precision,recall,f1-score,support
Normal,0.14±0.38,0.04±0.04,0.06±0.04,757.00±141.41
Benign,0.58±0.10,0.72±0.06,0.65±0.05,2204.00±105.79
Malignant,0.67±0.04,0.67±0.09,0.67±0.06,2470.00±85.70
accuracy,0.61±0.04,0.61±0.04,0.61±0.04,0.61±0.04
macro avg,0.46±0.10,0.48±0.02,0.46±0.03,5431.00±87.78
weighted avg,0.56±0.04,0.61±0.04,0.58±0.07,5431.00±87.78


Unnamed: 0,precision,recall,f1-score,support
Normal,0.07±0.15,0.01±0.01,0.01±0.02,757.00±82.68
Benign,0.69±0.06,0.60±0.11,0.64±0.09,2204.00±101.68
Malignant,0.57±0.13,0.80±0.05,0.67±0.11,2470.00±164.26
accuracy,0.61±0.08,0.61±0.08,0.61±0.08,0.61±0.08
macro avg,0.44±0.09,0.47±0.05,0.44±0.05,5431.00±32.74
weighted avg,0.55±0.08,0.61±0.08,0.57±0.08,5431.00±32.74


Unnamed: 0,precision,recall,f1-score,support
Normal,0.29±0.11,0.11±0.06,0.16±0.07,757.00±82.68
Benign,0.71±0.06,0.56±0.13,0.62±0.10,2204.00±101.68
Malignant,0.57±0.13,0.80±0.05,0.67±0.11,2470.00±164.26
accuracy,0.60±0.08,0.60±0.08,0.60±0.08,0.60±0.08
macro avg,0.53±0.04,0.49±0.04,0.48±0.04,5431.00±32.74
weighted avg,0.59±0.04,0.60±0.08,0.58±0.07,5431.00±32.74


Unnamed: 0,precision,recall,f1-score,support
Normal,0.18±0.05,0.40±0.18,0.25±0.05,757.00±82.68
Benign,0.68±0.07,0.49±0.14,0.57±0.09,2204.00±101.68
Malignant,0.59±0.18,0.52±0.11,0.55±0.06,2470.00±164.26
accuracy,0.49±0.05,0.49±0.05,0.49±0.05,0.49±0.05
macro avg,0.48±0.04,0.47±0.04,0.46±0.03,5431.00±32.74
weighted avg,0.57±0.07,0.49±0.05,0.52±0.05,5431.00±32.74


Unnamed: 0,precision,recall,f1-score,support
Normal,0.52±0.27,0.15±0.15,0.24±0.17,757.00±82.68
Benign,0.62±0.05,0.72±0.09,0.67±0.05,2204.00±101.68
Malignant,0.63±0.14,0.67±0.04,0.65±0.09,2470.00±164.26
accuracy,0.62±0.05,0.62±0.05,0.62±0.05,0.62±0.05
macro avg,0.59±0.08,0.52±0.05,0.52±0.07,5431.00±32.74
weighted avg,0.61±0.06,0.62±0.05,0.60±0.06,5431.00±32.74
