# Sheet properties

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}


In [13]:
%matplotlib inline
# produce vector inline graphics
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('pdf', 'svg')
import matplotlib.pyplot as plt

# Start with local

In [14]:
import sys
from IPython.display import HTML
sys.path.append('/home/rcendre/classification')

# Imports

In [18]:
import webbrowser
import pandas
from pathlib import Path
from IPython.display import display
from numpy import array, logspace
from scipy.stats import randint as randint
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from toolbox.classification.common import Data, Folds, IO, Tools
from toolbox.classification.parameters import Dermatology, Settings
from toolbox.IO import dermatology
from toolbox.transforms.common import PredictorTransform, FlattenTransform
from toolbox.transforms.labels import OrderedEncoder
from toolbox.views.common import Views, ViewsTools
from toolbox.views.images import ImagesViews, PatchViews
from toolbox.models.models import DecisionVotingClassifier, ScoreVotingClassifier
from toolbox.transforms.images import SpatialImageTransform
import warnings
warnings.filterwarnings('ignore') 

# Parameters

In [16]:
# Advanced parameters
use_unknown = False 
validation = 4
settings = Settings.get_default_dermatology()
label_encoder = OrderedEncoder().fit(['Normal', 'Benign', 'Malignant'])

# Low level classifier

In [19]:
patch = Dermatology.images(modality='Microscopy', data_type='Patch', use_unknown=use_unknown)
label_encoder = OrderedEncoder().fit(['Normal', 'Benign', 'Malignant'])
Tools.transform(patch, {'datum': 'Label'}, label_encoder, 'LabelEncode')
Tools.transform(patch, {'datum': 'Datum'}, SpatialImageTransform(), 'Wiltgen')

In [21]:
low_predictor = Pipeline([('scale', StandardScaler()),('clf', SVC(kernel='linear', class_weight='balanced', max_iter=300, probability=True))])
low_params = {'clf__C': logspace(-2, 3, 6).tolist()}

In [25]:
low_predictor = Tools.fit(patch, {'datum': 'Wiltgen', 'label_encode': 'LabelEncode'}, low_predictor, distribution=low_params)

# Inputs

In [27]:
inputs_options = []
inputs_options.append((250,0))
inputs_options.append((250,25))
inputs_options.append((250,50))
inputs_options.append((500,0))
inputs_options.append((500,25))
inputs_options.append((500,50))

Progress: |███████████████████████████████████████████████████████████████████████████████████████████████████-| 100.0 

In [None]:
for size, overlap in inputs_options:
    inputs = Dermatology.sliding_images(size=size, overlap=overlap/100, modality='Microscopy'))
    Tools.transform(inputs, {'datum': 'Datum'}, SpatialImageTransform(), 'Wiltgen')
    # Transform groups
    group_encoder = LabelEncoder().fit(array(inputs['ID_Patient'].tolist()))
    Tools.transform(inputs, {'datum': 'ID_Patient'}, group_encoder, 'GroupEncode')
    # Transform labels
    label_encoder = OrderedEncoder().fit(['Normal', 'Benign', 'Malignant'])
    Tools.transform(inputs, {'datum': 'Label'}, label_encoder, 'LabelEncode')
    # Make folds
    Folds.build_group_folds(inputs, {'datum': 'Datum', 'label_encode': 'LabelEncode', 'group': 'GroupEncode'}, validation)
    IO.save(inputs, f'SExtraction_{size}_{overlap}')

# Low level predictions

In [None]:
for size, overlap in inputs_options:
    inputs = IO.load(inputs, f'SExtraction_{size}_{overlap}')
    
    # Add scaling
    Tools.predict(inputs, {'datum': 'Wiltgen', 'label_encode': 'LabelEncode'}, low_predictor, Tools.PREDICTION, mask=inputs.Type != 'Full')
    Tools.predict_proba(inputs, {'datum': 'Wiltgen', 'label_encode': 'LabelEncode'}, low_predictor, Tools.PROBABILITY, mask=inputs.Type != 'Full')

    Data.build_bags(inputs, inputs.Type == 'Full', 'Reference', inputs.Type == 'Instance', 'Source', Tools.PREDICTION)
    Data.build_bags(inputs, inputs.Type == 'Full', 'Reference', inputs.Type == 'Instance', 'Source', Tools.PROBABILITY)

    # Filter elements
    inputs = inputs[inputs.Type == 'Full']
    
    # Low Level
    IO.save(inputs, f'SLow_{size}_{overlap}')

# High level predictions

In [None]:
for size, overlap in inputs_options:
    inputs = IO.load(inputs, f'SLow_{size}_{overlap}')
    
    # Decisions
    Tools.evaluate(inputs, {'datum': Tools.PREDICTION, 'label_encode': 'LabelEncode'}, DecisionVotingClassifier(mode='at_least_one'), 'D_ALO')
    Tools.evaluate(inputs, {'datum': Tools.PREDICTION, 'label_encode': 'LabelEncode'}, DecisionVotingClassifier(mode='dynamic_thresh'), 'D_DYN')

    # Score
    Tools.evaluate(inputs, {'datum': Tools.PROBABILITY, 'label_encode': 'LabelEncode'}, def_svc, 'S_SVC', mask=full)
    Tools.evaluate(inputs, {'datum': Tools.PROBABILITY, 'label_encode': 'LabelEncode'}, ScoreVotingClassifier(low='max', high='dynamic'), 'S_MaxD')
    
    # SVC
    score_svc = Pipeline([('flat', FlattenTransform()), ('clf', SVC(probability=True))])
    score_params = {'clf__C': logspace(-2, 3, 6).tolist()}
    Tools.evaluate(inputs, {'datum': Tools.PROBABILITY, 'label_encode': 'LabelEncode'}, score_svc, 'High_SVC', distribution=score_params)

    # High Level    
    IO.save(inputs, f'SHigh_{size}_{overlap}')

## Analysis

In [None]:
label_encoder = OrderedEncoder().fit(['Normal', 'Benign', 'Malignant'])
models = ['D_SVC', 'D_ALO', 'D_DYN', 'S_SVC', 'S_MaxD', 'S_MeanD', 'S_NormD']

In [None]:
for size, overlap in inputs_options:
    IO.load(inputs, f'SHigh_{size}_{overlap}')
    # Load evaluations
    name = f'{architecture}_{size}_{overlap}'
    weak = Path('').resolve()/f'SHIGH_{name}.hdf5'
    inputs = IO.load(weak)
    full = inputs.Type == 'Full'
    for model in models:
        title = f'{model}_{name}'
        # Details and results
        display(HTML(ViewsTools.dataframe_renderer([Views.report(ViewsTools.data_as(inputs[full], model), {'label_encode': 'LabelEncode', 'eval': model}, label_encoder),
                                                    Views.report(ViewsTools.data_as(inputs[full], model, as_train=True), {'label_encode': 'LabelEncode', 'eval': model}, label_encoder)],
                                                    title=[f'Test - {title}', f'Train - {title}'])))



In [None]:
index = 10
print(inputs[inputs.Type=='Full'].iloc[index]['Label'])
PatchViews.display(inputs, f'Supervised_{Tools.PREDICTION}', settings, label_encoder, index=index);
PatchViews.display(inputs, f'Supervised_{Tools.PROBABILITY}', settings, label_encoder, index=index);