# Sheet properties

In [1]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [2]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [3]:
%matplotlib inline
# produce vector inline graphics
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('pdf', 'svg')
import matplotlib.pyplot as plt

# Start with local

In [4]:
import sys
from IPython.display import HTML
sys.path.append('/home/rcendre/classification')

# Imports

In [15]:
import os
import sys
import itertools
import webbrowser
from pathlib import Path
import matplotlib.pyplot as plt
from numpy import array, logspace
from scipy.stats import randint,uniform
from imblearn import combine as combine
from imblearn import over_sampling as overs
from imblearn import under_sampling as unders
from sklearn.decomposition import PCA
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.manifold import TSNE
# from sklearn.pipeline import Pipeline
from imblearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, RobustScaler, StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import davies_bouldin_score
from toolbox.classification.common import Folds, IO, Tools
from toolbox.classification.parameters import Dermatology, Settings
from toolbox.models.builtin import Applications
from toolbox.IO import dermatology
from toolbox.transforms.common import PredictorTransform
from toolbox.transforms.labels import OrderedEncoder
from toolbox.transforms.images import DistributionImageTransform, DWTImageTransform, FourierImageTransform, HaralickImageTransform, SpatialImageTransform
from toolbox.views.common import Views, ViewsTools
from toolbox.views.images import ImagesViews
from IPython.display import display
from toolbox.transforms.at_most import LDAAtRatio

# Parameters

In [6]:
# Advanced parameters
data_type = 'Full'
validation = 4
settings = Settings.get_default_dermatology()

# Inputs

In [7]:
prediction_file = f'DataBalancement_{data_type}.pickle'

In [8]:
inputs = Dermatology.images(modality='Microscopy', data_type='Full')
inputs_spatial = IO.load(f'Spatial_Full.pickle')
inputs_freq = IO.load(f'Frequency_Full.pickle')
inputs_transfer = IO.load(f'Transfer_Full.pickle')

In [9]:
inputs['Spatial'] = inputs_spatial['Wiltgen']
inputs['Daubechies'] = inputs_freq['DWTDB4Sc1']
inputs['VGG16Avg'] = inputs_transfer['VGG16Avg']
inputs['ResNetAvg'] = inputs_transfer['ResNetAvg']

In [10]:
# Transform groups
group_encoder = LabelEncoder().fit(array(inputs['ID_Patient'].tolist()))
Tools.transform(inputs, {'datum': 'ID_Patient'}, group_encoder, 'GroupEncode')
# Transform labels
label_encoder = OrderedEncoder().fit(['Normal', 'Benign', 'Malignant'])
Tools.transform(inputs, {'datum': 'Label'}, label_encoder, 'LabelEncode')
# Make folds
Folds.build_group_folds(inputs, {'datum': 'Datum', 'label_encode': 'LabelEncode', 'group': 'GroupEncode'}, validation)

# Models

In [11]:
mains = [('Spatial', [('scale', MinMaxScaler())]),
        ('Daubechies', [('scale', StandardScaler())]),
        ('ResNetAvg', [('scale', MinMaxScaler())]),
        ('VGG16Avg', [('scale', MinMaxScaler()), ('reduction', LDAAtRatio(n_components=0.95))])]

In [12]:
clf = ('clf', SVC(kernel='linear', probability=True))
clf_params = {'clf__C': logspace(-2, 3, 6).tolist()}

In [13]:
strategies = []
strategies.append(('NoStrategy', ('strategy', None)))
# Random Over Sample
strategies.append(('RandomOS',('strategy', overs.RandomOverSampler())))
# Random Under Sample
strategies.append(('RandomUS',('strategy', unders.RandomUnderSampler())))
strategies.append(('SMOTEENN',('strategy', combine.SMOTEENN())))
strategies.append(('SMOTETomek',('strategy', combine.SMOTETomek())))

In [16]:
processes = []
for datum, main in mains:    
    for strategy_name,strategy in strategies:
        process = main.copy()
        if not strategy_name == 'NoStrategy':
            process.extend([strategy, clf])
        else:
            process.extend([clf])
        processes.append((datum, f'{datum}{strategy_name}', Pipeline(process)))

# Evaluation

In [None]:
for datum, name, process in processes: 

    # Start evaluation       
    print(f'{name} performed...', end='\r')
    Tools.evaluate(inputs, {'datum': datum, 'label_encode': 'LabelEncode'}, process, name, distribution=clf_params)
    print(f'{name} achieved !', end='\r')  

    # Save
    IO.save(inputs, prediction_file)

# Scores and ROC

In [21]:
inputs = IO.load(prediction_file)

# Transform labels
label_encoder = OrderedEncoder().fit(['Normal', 'Benign', 'Malignant'])

# ROC Curve
ViewsTools.plot_size((8,8))
    
for datum, name, process in processes:  
    # Label
#     name = f'{extractor_name}{model_name}Label' 
    display(HTML(ViewsTools.dataframe_renderer([Views.report(ViewsTools.data_as(inputs, name), {'label_encode': 'LabelEncode', 'eval': name}, label_encoder)],
                                                                        title=[f'Test - {name}'])))
#         display(Views.details(ViewsTools.data_as(inputs, name), {'eval': name}))
#         display(HTML(ViewsTools.dataframe_renderer([Views.report(ViewsTools.data_as(inputs, name), {'label_encode': 'MalignantEncode', 'eval': name}, malignant_encoder),
#                                     Views.report(ViewsTools.data_as(inputs, name, as_train=True), {'label_encode': 'MalignantEncode', 'eval': name}, malignant_encoder)],
#                                     title=[f'Test - {name}', f'Train - {name}'])))   
#         Views.receiver_operator_curves(ViewsTools.data_as(inputs, name), malignant_encoder, {'label_encode': 'MalignantEncode', 'eval': name}, settings);

  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,f1-score,precision,recall,support
Normal,0.17±0.13,0.32±0.19,0.11±0.11,757.00±135.71
Benign,0.65±0.12,0.58±0.16,0.75±0.07,2188.00±115.87
Malignant,0.66±0.08,0.68±0.06,0.64±0.12,2486.00±162.23
accuracy,0.61±0.11,0.61±0.11,0.61±0.11,0.61±0.11
macro avg,0.49±0.09,0.53±0.12,0.50±0.06,5431.00±0.43
weighted avg,0.59±0.13,0.59±0.15,0.61±0.11,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.42±0.07,0.38±0.14,0.46±0.14,757.00±135.71
Benign,0.66±0.10,0.62±0.13,0.71±0.06,2188.00±115.87
Malignant,0.63±0.08,0.70±0.04,0.56±0.10,2486.00±162.23
accuracy,0.61±0.08,0.61±0.08,0.61±0.08,0.61±0.08
macro avg,0.57±0.04,0.57±0.05,0.58±0.03,5431.00±0.43
weighted avg,0.61±0.08,0.62±0.07,0.61±0.08,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.36±0.06,0.39±0.17,0.32±0.08,757.00±135.71
Benign,0.65±0.12,0.60±0.12,0.72±0.12,2188.00±115.87
Malignant,0.64±0.05,0.69±0.07,0.60±0.05,2486.00±162.23
accuracy,0.61±0.08,0.61±0.08,0.61±0.08,0.61±0.08
macro avg,0.55±0.05,0.56±0.08,0.55±0.04,5431.00±0.43
weighted avg,0.61±0.07,0.61±0.06,0.61±0.08,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.34±0.10,0.29±0.18,0.43±0.08,757.00±135.71
Benign,0.65±0.12,0.61±0.16,0.69±0.09,2188.00±115.87
Malignant,0.62±0.07,0.73±0.03,0.54±0.09,2486.00±162.23
accuracy,0.58±0.08,0.58±0.08,0.58±0.08,0.58±0.08
macro avg,0.54±0.06,0.54±0.07,0.55±0.04,5431.00±0.43
weighted avg,0.59±0.08,0.62±0.07,0.58±0.08,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.24±0.12,0.28±0.07,0.22±0.18,757.00±135.71
Benign,0.65±0.11,0.59±0.14,0.73±0.12,2188.00±115.87
Malignant,0.65±0.05,0.71±0.04,0.61±0.07,2486.00±162.23
accuracy,0.60±0.08,0.60±0.08,0.60±0.08,0.60±0.08
macro avg,0.52±0.06,0.52±0.05,0.52±0.05,5431.00±0.43
weighted avg,0.59±0.10,0.60±0.09,0.60±0.08,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.18±0.12,0.45±0.26,0.11±0.10,757.00±135.71
Benign,0.68±0.12,0.63±0.16,0.74±0.09,2188.00±115.87
Malignant,0.69±0.07,0.66±0.10,0.71±0.08,2486.00±162.23
accuracy,0.64±0.10,0.64±0.10,0.64±0.10,0.64±0.10
macro avg,0.51±0.08,0.58±0.05,0.52±0.06,5431.00±0.43
weighted avg,0.61±0.12,0.62±0.06,0.64±0.10,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.55±0.17,0.53±0.25,0.56±0.11,757.00±135.71
Benign,0.69±0.13,0.65±0.15,0.74±0.10,2188.00±115.87
Malignant,0.67±0.04,0.73±0.02,0.63±0.06,2486.00±162.23
accuracy,0.66±0.06,0.66±0.06,0.66±0.06,0.66±0.06
macro avg,0.64±0.06,0.64±0.07,0.64±0.05,5431.00±0.43
weighted avg,0.66±0.06,0.67±0.06,0.66±0.06,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.56±0.17,0.51±0.22,0.62±0.13,757.00±135.71
Benign,0.71±0.12,0.67±0.13,0.75±0.11,2188.00±115.87
Malignant,0.67±0.05,0.74±0.02,0.61±0.08,2486.00±162.23
accuracy,0.67±0.06,0.67±0.06,0.67±0.06,0.67±0.06
macro avg,0.65±0.06,0.64±0.07,0.66±0.04,5431.00±0.43
weighted avg,0.67±0.06,0.68±0.05,0.67±0.06,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.52±0.18,0.46±0.21,0.60±0.14,757.00±135.71
Benign,0.69±0.13,0.65±0.15,0.73±0.11,2188.00±115.87
Malignant,0.65±0.05,0.74±0.03,0.58±0.06,2486.00±162.23
accuracy,0.65±0.08,0.65±0.08,0.65±0.08,0.65±0.08
macro avg,0.62±0.08,0.62±0.09,0.64±0.07,5431.00±0.43
weighted avg,0.65±0.07,0.66±0.06,0.65±0.08,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.54±0.19,0.51±0.23,0.57±0.16,757.00±135.71
Benign,0.70±0.13,0.65±0.16,0.75±0.09,2188.00±115.87
Malignant,0.67±0.04,0.75±0.01,0.61±0.06,2486.00±162.23
accuracy,0.66±0.06,0.66±0.06,0.66±0.06,0.66±0.06
macro avg,0.64±0.07,0.64±0.07,0.65±0.07,5431.00±0.43
weighted avg,0.66±0.06,0.67±0.05,0.66±0.06,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.41±0.07,0.68±0.19,0.29±0.03,757.00±135.71
Benign,0.78±0.07,0.74±0.10,0.82±0.05,2188.00±115.87
Malignant,0.81±0.04,0.77±0.07,0.84±0.02,2486.00±162.23
accuracy,0.76±0.06,0.76±0.06,0.76±0.06,0.76±0.06
macro avg,0.66±0.02,0.73±0.03,0.65±0.02,5431.00±0.43
weighted avg,0.74±0.06,0.75±0.04,0.76±0.06,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.55±0.14,0.57±0.15,0.53±0.13,757.00±135.71
Benign,0.78±0.07,0.76±0.08,0.80±0.05,2188.00±115.87
Malignant,0.81±0.02,0.82±0.01,0.81±0.03,2486.00±162.23
accuracy,0.76±0.04,0.76±0.04,0.76±0.04,0.76±0.04
macro avg,0.71±0.03,0.72±0.04,0.71±0.03,5431.00±0.43
weighted avg,0.76±0.04,0.76±0.04,0.76±0.04,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.56±0.17,0.55±0.17,0.58±0.17,757.00±135.71
Benign,0.77±0.05,0.78±0.06,0.77±0.06,2188.00±115.87
Malignant,0.81±0.03,0.81±0.01,0.80±0.05,2486.00±162.23
accuracy,0.76±0.03,0.76±0.03,0.76±0.03,0.76±0.03
macro avg,0.71±0.04,0.71±0.05,0.72±0.04,5431.00±0.43
weighted avg,0.76±0.03,0.76±0.03,0.76±0.03,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.53±0.18,0.53±0.19,0.53±0.17,757.00±135.71
Benign,0.76±0.05,0.76±0.05,0.76±0.06,2188.00±115.87
Malignant,0.79±0.03,0.80±0.05,0.79±0.00,2486.00±162.23
accuracy,0.74±0.03,0.74±0.03,0.74±0.03,0.74±0.03
macro avg,0.69±0.05,0.69±0.05,0.69±0.05,5431.00±0.43
weighted avg,0.74±0.03,0.74±0.03,0.74±0.03,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.58±0.15,0.61±0.15,0.56±0.18,757.00±135.71
Benign,0.78±0.06,0.77±0.08,0.80±0.06,2188.00±115.87
Malignant,0.82±0.02,0.83±0.02,0.81±0.04,2486.00±162.23
accuracy,0.77±0.04,0.77±0.04,0.77±0.04,0.77±0.04
macro avg,0.73±0.04,0.73±0.05,0.73±0.04,5431.00±0.43
weighted avg,0.77±0.04,0.77±0.04,0.77±0.04,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.60±0.21,0.72±0.21,0.51±0.21,757.00±135.71
Benign,0.73±0.09,0.70±0.13,0.78±0.04,2188.00±115.87
Malignant,0.77±0.04,0.77±0.04,0.77±0.06,2486.00±162.23
accuracy,0.73±0.06,0.73±0.06,0.73±0.06,0.73±0.06
macro avg,0.70±0.09,0.73±0.08,0.68±0.09,5431.00±0.43
weighted avg,0.73±0.06,0.74±0.05,0.73±0.06,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.57±0.17,0.57±0.17,0.57±0.17,757.00±135.71
Benign,0.72±0.08,0.69±0.11,0.74±0.04,2188.00±115.87
Malignant,0.76±0.04,0.79±0.05,0.74±0.05,2486.00±162.23
accuracy,0.72±0.05,0.72±0.05,0.72±0.05,0.72±0.05
macro avg,0.68±0.07,0.69±0.07,0.68±0.07,5431.00±0.43
weighted avg,0.72±0.05,0.72±0.05,0.72±0.05,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.59±0.17,0.63±0.18,0.56±0.18,757.00±135.71
Benign,0.73±0.08,0.70±0.12,0.76±0.04,2188.00±115.87
Malignant,0.77±0.04,0.79±0.05,0.75±0.05,2486.00±162.23
accuracy,0.73±0.05,0.73±0.05,0.73±0.05,0.73±0.05
macro avg,0.70±0.08,0.71±0.07,0.69±0.08,5431.00±0.43
weighted avg,0.73±0.05,0.73±0.05,0.73±0.05,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.57±0.17,0.56±0.17,0.57±0.17,757.00±135.71
Benign,0.72±0.08,0.70±0.12,0.74±0.05,2188.00±115.87
Malignant,0.77±0.04,0.79±0.05,0.75±0.04,2486.00±162.23
accuracy,0.72±0.05,0.72±0.05,0.72±0.05,0.72±0.05
macro avg,0.68±0.07,0.68±0.07,0.69±0.07,5431.00±0.43
weighted avg,0.72±0.05,0.72±0.05,0.72±0.05,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.57±0.17,0.56±0.17,0.57±0.17,757.00±135.71
Benign,0.72±0.08,0.70±0.12,0.74±0.04,2188.00±115.87
Malignant,0.76±0.04,0.79±0.05,0.74±0.04,2486.00±162.23
accuracy,0.72±0.05,0.72±0.05,0.72±0.05,0.72±0.05
macro avg,0.68±0.07,0.68±0.07,0.68±0.07,5431.00±0.43
weighted avg,0.72±0.05,0.72±0.05,0.72±0.05,5431.00±0.43
