# Sheet properties

In [2]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [3]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [4]:
%matplotlib inline
# produce vector inline graphics
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('pdf', 'svg')
import matplotlib.pyplot as plt

# Start with local

In [5]:
import sys
from IPython.display import HTML
sys.path.append('/home/rcendre/classification')

# Imports

In [6]:
import os
import sys
import itertools
import webbrowser
from pathlib import Path
import matplotlib.pyplot as plt
from numpy import array, logspace
from scipy.stats import randint,uniform
from sklearn.decomposition import PCA
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.manifold import TSNE
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, RobustScaler, StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import davies_bouldin_score
from toolbox.classification.common import Folds, IO, Tools
from toolbox.classification.parameters import Dermatology, Settings
from toolbox.models.builtin import Applications
from toolbox.IO import dermatology
from toolbox.transforms.at_most import LDAAtRatio
from toolbox.transforms.common import PredictorTransform
from toolbox.transforms.labels import OrderedEncoder
from toolbox.transforms.images import DistributionImageTransform, DWTImageTransform, FourierImageTransform, HaralickImageTransform, SpatialImageTransform
from toolbox.views.common import Views, ViewsTools
from toolbox.views.images import ImagesViews
from IPython.display import display

Using TensorFlow backend.


# Parameters

In [7]:
# Advanced parameters
data_type='Full'
validation = 4
settings = Settings.get_default_dermatology()

# Inputs

In [8]:
features_file = f'Transfer_{data_type}.pickle'
prediction_file = f'PredictionTransferReduction_{data_type}.pickle'

In [9]:
inputs = IO.load(features_file)

# Features extraction

In [10]:
extractors = [('VGG16Avg', {}), ('InceptionV3Avg', {}), ('InceptionResNetV2Avg', {}), ('ResNetAvg', {})]

# Classifier

In [11]:
clfs = []

# SVM Linear
clf = ('clf', SVC(kernel='linear', class_weight='balanced', probability=True))
clf_params = {'clf__C': logspace(-2, 3, 6).tolist()}
clfs.append(('SVML', clf, clf_params))

# SVM RBF
clf = ('clf', SVC(kernel='rbf', class_weight='balanced', probability=True))
clf_params = {'clf__C': logspace(-2, 3, 6).tolist(),
             'clf__gamma': logspace(-2, 3, 6).tolist()}
clfs.append(('SVMR', clf, clf_params))

# CART
clf = ('clf', DecisionTreeClassifier(class_weight='balanced'))
clf_params = {'clf__max_depth': randint(4, 10),
              'clf__min_samples_leaf': randint(1, 9),
              'clf__criterion': ['gini', 'entropy']}
clfs.append(('CART', clf, clf_params))

# Extra
clf = ('clf', ExtraTreesClassifier(n_estimators=100 ,class_weight='balanced'))
clf_params = {'clf__max_depth': randint(4, 10),
              'clf__min_samples_leaf': randint(1, 9),
              'clf__criterion': ['gini', 'entropy']}
clfs.append(('EXTRA', clf, clf_params))

# GB
clf = ('clf', GradientBoostingClassifier(n_estimators=100))
clf_params = {'clf__max_depth': randint(4, 10),
              'clf__min_samples_leaf': randint(1, 9)}
clfs.append(('GB', clf, clf_params))

# MLP
clf = ('clf', MLPClassifier())
clf_params = {'clf__hidden_layer_sizes': [(randint.rvs(10,20,1),randint.rvs(10,20,1),),(randint.rvs(10,20,1),)],
                'clf__activation': ['tanh', 'relu'],
                'clf__solver': ['sgd', 'adam'],
                'clf__alpha': uniform(0.0001, 0.9),
                'clf__learning_rate': ['constant','adaptive']}
clfs.append(('MLP', clf, clf_params))

# Models

In [12]:
scalers = [('MMS',('scale', MinMaxScaler())), ('RS',('scale', RobustScaler())), ('SS',('scale', StandardScaler()))]

reductions = [('PCA95',('reduction', PCA(n_components=0.95))), ('PCA975',('reduction', PCA(n_components=0.975))), 
              ('PCA99',('reduction', PCA(n_components=0.99))),
              ('LDA95',('reduction', LDAAtRatio(n_components=0.95))), ('LDA975',('reduction', LDAAtRatio(n_components=0.975))), 
              ('LDA99',('reduction', LDAAtRatio(n_components=0.99)))]

processes = []
for scaler_name,scaler in scalers:

    for reduction_name,reduction in reductions:

        for clf_name, clf, clf_params in clfs:
        
            process = Pipeline([scaler, reduction, clf])
            processes.append((f'{scaler_name}_{reduction_name}_{clf_name}', process, clf_params))
            

# Evaluation

In [13]:
# for extractor in extractors: 
#     # Decompose model
#     extractor_name, extractor_process = extractor
    
#     for process in processes:   
#         # Decompose model       
#         process_name, process_process, process_params = process
        
#         # Start evaluation
#         name = f'{extractor_name}{process_name}Label'        
#         print(f'{name} performed...', end='\r')
#         Tools.evaluate(inputs, {'datum': extractor_name, 'label_encode': 'LabelEncode'}, process_process, name, distribution=process_params)
#         print(f'{name} achieved !', end='\r')  
        
#         # Save
#         IO.save(inputs, prediction_file)

# Scores and ROC

In [15]:
inputs = IO.load(prediction_file)

# Transform labels
label_encoder = OrderedEncoder().fit(['Normal', 'Benign', 'Malignant'])

# ROC Curve
ViewsTools.plot_size((8,8))
for extractor in extractors: 
    extractor_name, extractor_process = extractor
    
    for model in processes:  
        # Decompose model
        model_name, model_process, model_params = model        
        
        # Label
        try:
            name = f'{extractor_name}{model_name}Label' 
            display(HTML(ViewsTools.dataframe_renderer([Views.report(ViewsTools.data_as(inputs, name), {'label_encode': 'LabelEncode', 'eval': name}, label_encoder)],
                                                                            title=[f'Test - {name}'])))
        except:
            print('{name} not available')
#         display(Views.details(ViewsTools.data_as(inputs, name), {'eval': name}))
#         display(HTML(ViewsTools.dataframe_renderer([Views.report(ViewsTools.data_as(inputs, name), {'label_encode': 'MalignantEncode', 'eval': name}, malignant_encoder),
#                                     Views.report(ViewsTools.data_as(inputs, name, as_train=True), {'label_encode': 'MalignantEncode', 'eval': name}, malignant_encoder)],
#                                     title=[f'Test - {name}', f'Train - {name}'])))   
#         Views.receiver_operator_curves(ViewsTools.data_as(inputs, name), malignant_encoder, {'label_encode': 'MalignantEncode', 'eval': name}, settings);

Unnamed: 0,f1-score,precision,recall,support
Normal,0.54±0.18,0.53±0.21,0.55±0.14,757.00±135.71
Benign,0.76±0.05,0.75±0.07,0.77±0.04,2188.00±115.87
Malignant,0.78±0.03,0.80±0.04,0.76±0.03,2486.00±162.23
accuracy,0.74±0.03,0.74±0.03,0.74±0.03,0.74±0.03
macro avg,0.69±0.06,0.69±0.06,0.70±0.06,5431.00±0.43
weighted avg,0.74±0.03,0.74±0.04,0.74±0.03,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.56±0.19,0.59±0.20,0.53±0.18,757.00±135.71
Benign,0.76±0.07,0.72±0.10,0.79±0.04,2188.00±115.87
Malignant,0.79±0.04,0.82±0.05,0.77±0.03,2486.00±162.23
accuracy,0.75±0.03,0.75±0.03,0.75±0.03,0.75±0.03
macro avg,0.70±0.07,0.71±0.07,0.70±0.07,5431.00±0.43
weighted avg,0.74±0.04,0.75±0.04,0.75±0.03,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.51±0.18,0.55±0.22,0.48±0.14,757.00±135.71
Benign,0.61±0.09,0.60±0.11,0.63±0.08,2188.00±115.87
Malignant,0.66±0.07,0.66±0.08,0.66±0.08,2486.00±162.23
accuracy,0.62±0.04,0.62±0.04,0.62±0.04,0.62±0.04
macro avg,0.60±0.06,0.60±0.07,0.59±0.06,5431.00±0.43
weighted avg,0.62±0.04,0.62±0.04,0.62±0.04,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.26±0.08,0.69±0.20,0.16±0.06,757.00±135.71
Benign,0.67±0.10,0.61±0.13,0.75±0.05,2188.00±115.87
Malignant,0.70±0.07,0.69±0.12,0.71±0.03,2486.00±162.23
accuracy,0.65±0.07,0.65±0.07,0.65±0.07,0.65±0.07
macro avg,0.55±0.04,0.66±0.02,0.54±0.03,5431.00±0.43
weighted avg,0.63±0.08,0.66±0.03,0.65±0.07,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.41±0.13,0.72±0.20,0.28±0.09,757.00±135.71
Benign,0.74±0.07,0.70±0.10,0.78±0.03,2188.00±115.87
Malignant,0.77±0.04,0.74±0.07,0.80±0.04,2486.00±162.23
accuracy,0.72±0.04,0.72±0.04,0.72±0.04,0.72±0.04
macro avg,0.64±0.03,0.72±0.04,0.62±0.04,5431.00±0.43
weighted avg,0.71±0.04,0.72±0.03,0.72±0.04,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.33±0.12,0.54±0.24,0.24±0.09,757.00±135.71
Benign,0.77±0.05,0.74±0.06,0.80±0.04,2188.00±115.87
Malignant,0.76±0.08,0.72±0.12,0.79±0.03,2486.00±162.23
accuracy,0.72±0.06,0.72±0.06,0.72±0.06,0.72±0.06
macro avg,0.62±0.03,0.67±0.05,0.61±0.03,5431.00±0.43
weighted avg,0.70±0.06,0.70±0.04,0.72±0.06,5431.00±0.43


{name} not available
{name} not available
{name} not available
{name} not available
{name} not available
{name} not available
{name} not available
{name} not available
{name} not available
{name} not available
{name} not available
{name} not available


Unnamed: 0,f1-score,precision,recall,support
Normal,0.58±0.16,0.58±0.15,0.57±0.17,757.00±135.71
Benign,0.72±0.08,0.70±0.12,0.75±0.04,2188.00±115.87
Malignant,0.77±0.04,0.79±0.05,0.75±0.05,2486.00±162.23
accuracy,0.72±0.05,0.72±0.05,0.72±0.05,0.72±0.05
macro avg,0.69±0.07,0.69±0.06,0.69±0.07,5431.00±0.43
weighted avg,0.72±0.05,0.72±0.05,0.72±0.05,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.25±0.09,0.43±0.10,0.17±0.09,757.00±135.71
Benign,0.72±0.08,0.68±0.12,0.76±0.04,2188.00±115.87
Malignant,0.72±0.10,0.69±0.16,0.74±0.05,2486.00±162.23
accuracy,0.67±0.09,0.67±0.09,0.67±0.09,0.67±0.09
macro avg,0.56±0.07,0.60±0.07,0.56±0.05,5431.00±0.43
weighted avg,0.65±0.11,0.65±0.10,0.67±0.09,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.45±0.10,0.47±0.16,0.43±0.08,757.00±135.71
Benign,0.69±0.10,0.66±0.13,0.74±0.06,2188.00±115.87
Malignant,0.75±0.04,0.78±0.04,0.72±0.05,2486.00±162.23
accuracy,0.68±0.05,0.68±0.05,0.68±0.05,0.68±0.05
macro avg,0.63±0.05,0.63±0.05,0.63±0.05,5431.00±0.43
weighted avg,0.68±0.05,0.69±0.06,0.68±0.05,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.59±0.18,0.64±0.19,0.55±0.18,757.00±135.71
Benign,0.73±0.08,0.69±0.11,0.78±0.05,2188.00±115.87
Malignant,0.76±0.03,0.79±0.05,0.73±0.03,2486.00±162.23
accuracy,0.73±0.05,0.73±0.05,0.73±0.05,0.73±0.05
macro avg,0.69±0.08,0.71±0.08,0.69±0.08,5431.00±0.43
weighted avg,0.73±0.05,0.73±0.05,0.73±0.05,5431.00±0.43


Unnamed: 0,f1-score,precision,recall,support
Normal,0.59±0.20,0.66±0.21,0.52±0.20,757.00±135.71
Benign,0.72±0.09,0.68±0.12,0.75±0.04,2188.00±115.87
Malignant,0.75±0.04,0.76±0.04,0.75±0.05,2486.00±162.23
accuracy,0.72±0.05,0.72±0.05,0.72±0.05,0.72±0.05
macro avg,0.69±0.08,0.70±0.08,0.67±0.08,5431.00±0.43
weighted avg,0.72±0.05,0.72±0.05,0.72±0.05,5431.00±0.43


{name} not available


KeyboardInterrupt: 