# Sheet properties

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

# Imports

In [None]:
import sys
sys.path.append('/home/rcendre/classification')
from joblib import dump, load
from numpy import array, logspace
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.svm import SVC
from sklearn.metrics import f1_score
from toolbox.classification.common import Data, Folds, IO, Tools
from toolbox.classification.parameters import Dermatology, Settings
from toolbox.transforms.common import FlattenTransform
from toolbox.transforms.labels import OrderedEncoder
from toolbox.views.common import Views, ViewsTools
from toolbox.views.images import PatchViews
from toolbox.models.models import DecisionVotingClassifier, ScoreVotingClassifier
from toolbox.transforms.images import DWTImageTransform
import warnings
warnings.filterwarnings('ignore') 

# Parameters

In [None]:
# Advanced parameters
use_unknown = False 
validation = 4
settings = Settings.get_default_dermatology()

# Patch classifier

In [None]:
patch = Dermatology.images(modality='Microscopy', data_type='Patch', use_unknown=use_unknown)
label_encoder = OrderedEncoder().fit(['Normal', 'Benign', 'Malignant'])
Tools.transform(patch, {'datum': 'Label'}, label_encoder, 'LabelEncode')
Tools.transform(patch, {'datum': 'Datum'}, DWTImageTransform(wavelets='db4', scale=1), 'Frequency')
IO.save(patch, f'PatchFrequency.pickle')

In [None]:
low_predictor = Pipeline([('scale', MinMaxScaler()),('clf', SVC(kernel='linear', class_weight='balanced', max_iter=300, probability=True))])
low_params = {'clf__C': logspace(-2, 3, 6).tolist()}

In [None]:
low_predictor = Tools.fit(patch, {'datum': 'Frequency', 'label_encode': 'LabelEncode'}, low_predictor, distribution=low_params)

In [None]:
dump(low_predictor, 'FrequencyPredictor')

# Sliding window

In [None]:
inputs_options = []
inputs_options.append((250,0))
inputs_options.append((250,25))
inputs_options.append((250,50))
inputs_options.append((500,0))
inputs_options.append((500,25))
inputs_options.append((500,50))

In [None]:
# low_predictor = load('FrequencyPredictor')
# for size, overlap in inputs_options:
#     inputs = Dermatology.sliding_images(size=size, overlap=overlap/100, modality='Microscopy')
#     Tools.transform(inputs, {'datum': 'Datum'}, DWTImageTransform(wavelets='db4', scale=1), 'Frequency')
#     # Transform groups
#     group_encoder = LabelEncoder().fit(array(inputs['ID_Patient'].tolist()))
#     Tools.transform(inputs, {'datum': 'ID_Patient'}, group_encoder, 'GroupEncode')
#     # Transform labels
#     label_encoder = OrderedEncoder().fit(['Normal', 'Benign', 'Malignant'])
#     Tools.transform(inputs, {'datum': 'Label'}, label_encoder, 'LabelEncode')
#     # Make folds
#     Folds.build_group_folds(inputs, {'datum': 'Datum', 'label_encode': 'LabelEncode', 'group': 'GroupEncode'}, validation)
#     IO.save(inputs, f'Frequency_{size}_{overlap}')

# Low level predictions

In [None]:
# for size, overlap in inputs_options:
#     inputs = IO.load(f'Frequency_{size}_{overlap}')
    
#     # Add scaling
#     Tools.predict(inputs, {'datum': 'Frequency'}, low_predictor, Tools.PREDICTION, mask=inputs.Type != 'Full')
#     Tools.predict_proba(inputs, {'datum': 'Frequency'}, low_predictor, Tools.PROBABILITY, mask=inputs.Type != 'Full')

#     Data.build_bags(inputs, inputs.Type == 'Full', 'Reference', inputs.Type == 'Instance', 'Source', Tools.PREDICTION)
#     Data.build_bags(inputs, inputs.Type == 'Full', 'Reference', inputs.Type == 'Instance', 'Source', Tools.PROBABILITY)

#     # Filter elements
#     inputs = inputs[inputs.Type == 'Full']
    
#     # Low Level
#     IO.save(inputs, f'LowFrequency_{size}_{overlap}.pickle')

# High level predictions

In [None]:
# for size, overlap in inputs_options:
#     inputs = IO.load(f'LowSpatial_{size}_{overlap}.pickle')
    
#     # Decisions
#     Tools.evaluate(inputs, {'datum': Tools.PREDICTION, 'label_encode': 'LabelEncode'}, DecisionVotingClassifier(mode='at_least_one', metric=f1_score), 'D_ALO')
#     Tools.evaluate(inputs, {'datum': Tools.PREDICTION, 'label_encode': 'LabelEncode'}, DecisionVotingClassifier(mode='dynamic_thresh', metric=f1_score), 'D_DYN')

#     # Score
#     Tools.evaluate(inputs, {'datum': Tools.PROBABILITY, 'label_encode': 'LabelEncode'}, ScoreVotingClassifier(low='max', high='dynamic'), 'S_MAX')
    
#     # SVC
#     score_svc = Pipeline([('flat', FlattenTransform()), ('clf', SVC(probability=True))])
#     score_params = {'clf__C': logspace(-2, 3, 6).tolist()}
#     Tools.evaluate(inputs, {'datum': Tools.PROBABILITY, 'label_encode': 'LabelEncode'}, score_svc, 'High_SVC', distribution=score_params)

#     # High Level    
#     IO.save(inputs, f'HighFrequency_{size}_{overlap}.pickle')

## Analysis

In [None]:
from IPython.display import HTML
from IPython.display import display

label_encoder = OrderedEncoder().fit(['Normal', 'Benign', 'Malignant'])
models = ['D_ALO', 'D_DYN', 'S_MAX', 'High_SVC']

for size, overlap in inputs_options:
    inputs = IO.load(f'HighFrequency_{size}_{overlap}.pickle')
    
    for high_name in models:
        name = f'{high_name}_{size}_{overlap}'
        # Details and results
        display(HTML(ViewsTools.dataframe_renderer([Views.report(inputs, {'label_encode': 'LabelEncode', 'eval': high_name}, label_encoder)],
                                                                    title=[f'Test - {name}']))) 

In [None]:
index = 10
print(inputs[inputs.Type=='Full'].iloc[index]['Label'])
PatchViews.display(inputs, Tools.PREDICTION, settings, label_encoder, index=index);
PatchViews.display(inputs, Tools.PROBABILITY, settings, label_encoder, index=index);