# Sheet properties

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

# Imports

In [None]:
import sys
sys.path.append('/home/rcendre/classification')
from numpy import array, logspace
from scipy.stats import randint as randint
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from toolbox.classification.common import Data, Folds, IO, Tools
from toolbox.classification.parameters import Dermatology, Settings
from toolbox.IO import dermatology
from toolbox.transforms.common import PredictorTransform, FlattenTransform
from toolbox.transforms.images import HaralickImageTransform, SpatialImageTransform
from toolbox.transforms.labels import OrderedEncoder
from toolbox.views.common import Views, ViewsTools
from toolbox.models.models import DecisionVotingClassifier, ScoreVotingClassifier
import warnings
warnings.filterwarnings('ignore') 

# Parameters

In [None]:
# Advanced parameters
validation = 4
settings = Settings.get_default_dermatology()
coefficients = [1, 0.75, 0.5, 0.25]

# Inputs

In [None]:
features_file = f'Extraction_Multiresolution.pickle'
iteration_limit = -1

In [None]:
inputs = Dermatology.multiple_resolution(coefficients, modality='Microscopy')
# Transform groups
group_encoder = LabelEncoder().fit(array(inputs['ID_Patient'].tolist()))
Tools.transform(inputs, {'datum': 'ID_Patient'}, group_encoder, 'GroupEncode')
# Transform labels
label_encoder = OrderedEncoder().fit(['Normal', 'Benign', 'Malignant'])
Tools.transform(inputs, {'datum': 'Label'}, label_encoder, 'LabelEncode')
# Make folds
Folds.build_group_folds(inputs, {'datum': 'Datum', 'label_encode': 'LabelEncode', 'group': 'GroupEncode'}, validation)

In [None]:
Tools.transform(inputs, {'datum': 'Datum'}, SpatialImageTransform(), 'Wiltgen')
IO.save(inputs, features_file)

# Fusion features

In [None]:
prediction_features_file = f'Prediction_MultiResolution_Features.pickle'

In [None]:
features_clfs = []

# SVM Linear
clf = Pipeline([('flat', FlattenTransform()),('clf', SVC(kernel='linear', class_weight='balanced', max_iter=iteration_limit, probability=True))])
clf_params = {'clf__C': logspace(-2, 3, 6).tolist()}
features_clfs.append(('SVML', clf, clf_params))

In [None]:
inputs = IO.load(features_file)
    
for clf_name, clf, clf_params in features_clfs:

    Data.build_bags(inputs, inputs.Type == 'Full', 'Reference', inputs.Type == 'Instance', 'Source', extractor_name)

    name = f'Wiltgen_{clf_name}'

    Tools.evaluate(inputs, {'datum': 'Wiltgen', 'label_encode': 'LabelEncode'}, clf, name, distribution=clf_params, mask=inputs.Type == 'Full') 

    IO.save(inputs, prediction_features_file)

# Fusion decisions

In [None]:
prediction_decision_file = f'Prediction_Multiresolution_Decisions.pickle'

## Low level predictions

In [None]:
# SVM Linear
model_features = Pipeline([('flat', FlattenTransform()),('clf', SVC(kernel='linear', class_weight='balanced', max_iter=iteration_limit, probability=True))])
params = {'clf__C': logspace(-2, 3, 6).tolist()}

In [None]:
low_folds = [([1, 2], [3]),
             ([2, 3], [4]),
             ([3, 4], [1]),
             ([4, 1], [2])]

In [None]:
inputs = IO.load(features_file)

for coefficient in coefficients:
    # Add scaling
    Tools.fit_predict(inputs, {'datum': extractor_name, 'label_encode': 'LabelEncode'}, model_features, 'SVC',
                      mask=inputs['Coefficient']==coefficient, folds=low_folds, distribution=params)

Data.build_bags(inputs, inputs.Type == 'Full', 'Reference', inputs.Type == 'Instance', 'Source', f'SVC_{Tools.PREDICTION}')
Data.build_bags(inputs, inputs.Type == 'Full', 'Reference', inputs.Type == 'Instance', 'Source', f'SVC_{Tools.PROBABILITY}')

IO.save(inputs, prediction_decision_file)

## High level predictions

In [None]:
inputs = IO.load(prediction_decision_file)
inputs = inputs[inputs.Type == 'Full'].reset_index()

In [None]:
# Majority
model_decisions = MajorityVotingClassifier(voting='hard')
model_scores = MajorityVotingClassifier(voting='soft')
# SVM Linear
svc_scores = Pipeline([('flat', FlattenTransform()),('clf', SVC(kernel='linear', class_weight='balanced', max_iter=iteration_limit, probability=True))])
svc_params = {'clf__C': logspace(-2, 3, 6).tolist()}

In [None]:
high_folds = [([4], [1]),
             ([1], [2]),
             ([2], [3]),
             ([3], [4])]

In [None]:
Tools.evaluate(inputs, {'datum': f'SVC_{Tools.PREDICTION}', 'label_encode': 'LabelEncode'}, 
               model_decisions, 'Decision', folds=high_folds) 

Tools.evaluate(inputs, {'datum': f'SVC_{Tools.PROBABILITY}', 'label_encode': 'LabelEncode'}, 
               model_scores, 'Score', folds=high_folds)     

Tools.evaluate(inputs, {'datum': f'SVC_{Tools.PROBABILITY}', 'label_encode': 'LabelEncode'},
               svc_scores, 'HSVC', distribution=svc_params, folds=high_folds)

IO.save(inputs, prediction_decision_file)    

## Analysis

In [None]:
from IPython.display import HTML
from IPython.display import display

In [None]:
inputs = IO.load(prediction_features_file)
inputs = inputs[inputs.Type == 'Full']

# Transform labels
label_encoder = OrderedEncoder().fit(['Normal', 'Benign', 'Malignant'])

# ROC Curve
ViewsTools.plot_size((8,8))
    
for clf_name, clf, clf_params in features_clfs:
    # Label
    name = f'{extractor_name}_{clf_name}' 
    display(HTML(ViewsTools.dataframe_renderer([Views.report(inputs, {'label_encode': 'LabelEncode', 'eval': name}, label_encoder)],
                                                                    title=[f'Test - {name}'])))

In [None]:
inputs = IO.load(prediction_decision_file)
inputs = inputs[inputs.Type == 'Full']

# Transform labels
label_encoder = OrderedEncoder().fit(['Normal', 'Benign', 'Malignant'])

# ROC Curve
ViewsTools.plot_size((8,8))
for extractor in extractors: 
    extractor_name, extractor_process = extractor   
        
    # Label
    high_name = f'{extractor_name}_Decision' 
    display(HTML(ViewsTools.dataframe_renderer([Views.report(inputs, {'label_encode': 'LabelEncode', 'eval': high_name}, label_encoder)],
                                                                    title=[f'Test - {high_name}'])))    
    
    high_name = f'{extractor_name}_Score'
    display(HTML(ViewsTools.dataframe_renderer([Views.report(inputs, {'label_encode': 'LabelEncode', 'eval': high_name}, label_encoder)],
                                                                    title=[f'Test - {high_name}'])))
    
    high_name = f'{extractor_name}_HSVC'
    display(HTML(ViewsTools.dataframe_renderer([Views.report(inputs, {'label_encode': 'LabelEncode', 'eval': high_name}, label_encoder)],
                                                                    title=[f'Test - {high_name}'])))