# Sheet properties

In [1]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

# Imports

In [2]:
import sys
sys.path.append('/home/rcendre/classification')
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"]="1";  
from numpy import array, logspace
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.svm import SVC
from sklearn.metrics import f1_score
from toolbox.models.builtin import Applications
from toolbox.classification.common import Data, Folds, IO, Tools
from toolbox.classification.parameters import Dermatology, Settings
from toolbox.transforms.common import FlattenTransform
from toolbox.transforms.labels import OrderedEncoder
from toolbox.views.common import Views, ViewsTools
from toolbox.models.models import DecisionVotingClassifier, ScoreVotingClassifier
import warnings
warnings.filterwarnings('ignore') 

# Parameters

In [3]:
# Advanced parameters
validation = 4
settings = Settings.get_default_dermatology()
coefficients = [1, 0.75, 0.5, 0.25]

# Inputs

In [4]:
features_file = f'Multiresolution_Transfert.pickle'

In [5]:
inputs = Dermatology.multiple_resolution(coefficients, modality='Microscopy')
# Transform groups
group_encoder = LabelEncoder().fit(array(inputs['ID_Patient'].tolist()))
Tools.transform(inputs, {'datum': 'ID_Patient'}, group_encoder, 'GroupEncode')
# Transform labels
label_encoder = OrderedEncoder().fit(['Normal', 'Benign', 'Malignant'])
Tools.transform(inputs, {'datum': 'Label'}, label_encoder, 'LabelEncode')
# Make folds
Folds.build_group_folds(inputs, {'datum': 'Datum', 'label_encode': 'LabelEncode', 'group': 'GroupEncode'}, validation)

Progress: |███████████████████████████████████████████████████████████-----------------------------------------| 59.5 

KeyboardInterrupt: 

In [None]:
method = Applications.get_transfer_learning(architecture='ResNet', pooling='avg', batch_size=1, additional={})
method.transform = method.predict_proba
Tools.transform(inputs, {'datum': 'Datum'}, method, 'Transfert')
IO.save(inputs, features_file)

# Fusion features

In [None]:
prediction_features_file = f'Multiresolution_Prediction_LowTransfert.pickle'

In [None]:
features_clfs = []

# SVM Linear
clf = Pipeline([('flat', FlattenTransform()),('scale', MinMaxScaler()),('clf', SVC(kernel='linear', class_weight='balanced', probability=True))])
clf_params = {'clf__C': logspace(-2, 3, 6).tolist()}
features_clfs.append(('SVML', clf, clf_params))

In [None]:
inputs = IO.load(features_file)
    
for clf_name, clf, clf_params in features_clfs:

    Data.build_bags(inputs, inputs.Type == 'Full', 'Reference', inputs.Type == 'Instance', 'Source', 'Transfert')

    Tools.evaluate(inputs, {'datum': 'Transfert', 'label_encode': 'LabelEncode'}, clf, 'TransfertSVC', distribution=clf_params, mask=inputs.Type == 'Full') 

    IO.save(inputs, prediction_features_file)

# Fusion decisions

In [None]:
prediction_decision_file = f'Multiresolution_Prediction_HighTransfert.pickle'

## Low level predictions

In [None]:
# SVM Linear
model_features = Pipeline([('scale', MinMaxScaler()),('clf', SVC(kernel='linear', class_weight='balanced', probability=True))])
params = {'clf__C': logspace(-2, 3, 6).tolist()}

In [None]:
low_folds = [([1, 2], [3]),
             ([2, 3], [4]),
             ([3, 4], [1]),
             ([4, 1], [2])]

In [None]:
inputs = IO.load(features_file)

for coefficient in coefficients:
    # Add scaling
    Tools.fit_predict(inputs, {'datum': 'Transfert', 'label_encode': 'LabelEncode'}, model_features, 'SVC',
                      mask=inputs['Coefficient']==coefficient, folds=low_folds, distribution=params)

Data.build_bags(inputs, inputs.Type == 'Full', 'Reference', inputs.Type == 'Instance', 'Source', f'SVC_{Tools.PREDICTION}')
Data.build_bags(inputs, inputs.Type == 'Full', 'Reference', inputs.Type == 'Instance', 'Source', f'SVC_{Tools.PROBABILITY}')

IO.save(inputs, prediction_decision_file)

## High level predictions

In [None]:
inputs = IO.load(prediction_decision_file)
inputs = inputs[inputs.Type == 'Full'].reset_index()

In [None]:
# Majority
model_decisions = DecisionVotingClassifier(mode='max', metric=f1_score)
model_scores_mean = ScoreVotingClassifier(low='mean', high='max', metric=f1_score)
model_scores_max = ScoreVotingClassifier(low='max', high='max', metric=f1_score)
# SVM Linear
svc_scores = Pipeline([('flat', FlattenTransform()),('clf', SVC(kernel='linear', class_weight='balanced', probability=True))])
svc_params = {'clf__C': logspace(-2, 3, 6).tolist()}

In [None]:
high_folds = [([4], [1]),
             ([1], [2]),
             ([2], [3]),
             ([3], [4])]

In [None]:
Tools.evaluate(inputs, {'datum': f'SVC_{Tools.PREDICTION}', 'label_encode': 'LabelEncode'}, 
               model_decisions, 'Decision', folds=high_folds) 


Tools.evaluate(inputs, {'datum': f'SVC_{Tools.PROBABILITY}', 'label_encode': 'LabelEncode'}, 
               model_scores_mean, 'ScoreMean', folds=high_folds)     

Tools.evaluate(inputs, {'datum': f'SVC_{Tools.PROBABILITY}', 'label_encode': 'LabelEncode'}, 
               model_scores_max, 'ScoreMax', folds=high_folds)     

Tools.evaluate(inputs, {'datum': f'SVC_{Tools.PROBABILITY}', 'label_encode': 'LabelEncode'},
               svc_scores, 'HSVC', distribution=svc_params, folds=high_folds)

IO.save(inputs, prediction_decision_file)    

## Analysis

In [None]:
from IPython.display import HTML
from IPython.display import display

In [None]:
inputs = IO.load(prediction_features_file)
inputs = inputs[inputs.Type == 'Full']

# Transform labels
label_encoder = OrderedEncoder().fit(['Normal', 'Benign', 'Malignant'])

# ROC Curve
ViewsTools.plot_size((8,8))
    
for clf_name, clf, clf_params in features_clfs:
    # Label
    name = f'TransfertSVC' 
    display(HTML(ViewsTools.dataframe_renderer([Views.report(inputs, {'label_encode': 'LabelEncode', 'eval': name}, label_encoder)],
                                                                    title=[f'Test - {name}'])))

In [None]:
inputs = IO.load(prediction_decision_file)
inputs = inputs[inputs.Type == 'Full']

# Transform labels
label_encoder = OrderedEncoder().fit(['Normal', 'Benign', 'Malignant'])

# ROC Curve
ViewsTools.plot_size((8,8))
        
# Label
high_name = f'Decision' 
display(HTML(ViewsTools.dataframe_renderer([Views.report(inputs, {'label_encode': 'LabelEncode', 'eval': high_name}, label_encoder)],
                                                                title=[f'Test - {high_name}'])))    

high_name = f'ScoreMean'
display(HTML(ViewsTools.dataframe_renderer([Views.report(inputs, {'label_encode': 'LabelEncode', 'eval': high_name}, label_encoder)],
                                                                title=[f'Test - {high_name}'])))

high_name = f'ScoreMax'
display(HTML(ViewsTools.dataframe_renderer([Views.report(inputs, {'label_encode': 'LabelEncode', 'eval': high_name}, label_encoder)],
                                                                title=[f'Test - {high_name}'])))

high_name = f'HSVC'
display(HTML(ViewsTools.dataframe_renderer([Views.report(inputs, {'label_encode': 'LabelEncode', 'eval': high_name}, label_encoder)],
                                                                title=[f'Test - {high_name}'])))