# Sheet properties

In [1]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

# Imports

In [2]:
import sys
sys.path.append('/home/rcendre/classification')
from numpy import array
from sklearn.preprocessing import LabelEncoder
from toolbox.classification.common import Folds, IO, Tools
from toolbox.classification.parameters import Dermatology, Settings
from toolbox.transforms.labels import OrderedEncoder
from toolbox.transforms.images import DWTGGDImageTransform, DWTImageTransform, FourierImageTransform
from numpy import logspace
from scipy.stats import randint,uniform
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from toolbox.classification.parameters import Settings
from toolbox.transforms.labels import OrderedEncoder
from toolbox.views.common import Views, ViewsTools
import warnings
warnings.filterwarnings('ignore') 

# Parameters

In [3]:
# Advanced parameters
use_unknown = False 
validation = 4
settings = Settings.get_default_dermatology()
max_iter = -1
prediction_file = 'Multiresolution_Prediction_Frequency.pickle'

# Inputs

In [4]:
inputs = Dermatology.images(modality='Microscopy', data_type='Full', use_unknown=use_unknown)

In [5]:
# Transform groups
group_encoder = LabelEncoder().fit(array(inputs['ID_Patient'].tolist()))
Tools.transform(inputs, {'datum': 'ID_Patient'}, group_encoder, 'GroupEncode')
# Transform labels
label_encoder = OrderedEncoder().fit(['Normal', 'Benign', 'Malignant'])
Tools.transform(inputs, {'datum': 'Label'}, label_encoder, 'LabelEncode')
# Make folds
Folds.build_group_folds(inputs, {'datum': 'Datum', 'label_encode': 'LabelEncode', 'group': 'GroupEncode'}, validation)

# Features extraction

In [6]:
features_file = f'Multiresolution_Frequency.pickle'

In [7]:
extractors = []
extractors.append(('DWTHaarSc5', DWTImageTransform(wavelets='haar', scale=5)))
extractors.append(('WiltgenDWT', DWTImageTransform(wavelets='db4', scale=5)))
extractors.append(('HalimiDWT', DWTGGDImageTransform(wavelets='db4', scale=4)))

In [8]:
for extractor in extractors: 
    # Decompose model
    extractor_name, extractor_process = extractor
    
    # Start extracting
    print(f'{extractor_name} performed...', end='\r')
    Tools.transform(inputs, {'datum': 'Datum'}, extractor_process, extractor_name)
    print(f'{extractor_name} achieved !', end='\r')
    
    # Save
    IO.save(inputs, features_file)

HalimiDWT achieved !...

# Classifier

In [9]:
clfs = []

# SVM Linear
clf = ('clf', SVC(kernel='linear', class_weight='balanced', max_iter=max_iter, probability=True))
clf_params = {'clf__C': logspace(-2, 3, 6).tolist()}
clfs.append(('SVML', clf, clf_params))

# Model

In [10]:
scalers = [('MMS',('scale', MinMaxScaler()))]#, ('SS',('scale', StandardScaler()))]

processes = []
for scaler_name,scaler in scalers:

    for clf_name, clf, clf_params in clfs:
        
        if scaler is None:
            process = Pipeline([clf])
        else:
            process = Pipeline([scaler, clf])
                
        processes.append((f'{scaler_name}_{clf_name}', process, clf_params))

# Evaluation

In [11]:
for extractor in extractors: 
    # Decompose model
    extractor_name, extractor_process = extractor
    
    for process in processes:   
        # Decompose model       
        process_name, process_process, process_params = process
        
        # Start evaluation
        name = f'{extractor_name}{process_name}Label'        
        print(f'{name} performed...', end='\r')
        Tools.evaluate(inputs, {'datum': extractor_name, 'label_encode': 'LabelEncode'}, process_process, name, distribution=process_params)
        print(f'{name} achieved !', end='\r')  
        
        # Save
        IO.save(inputs, prediction_file)

HalimiDWTMMS_SVMLLabel achieved !...

# Scores

In [12]:
from IPython.display import HTML
from IPython.display import display

inputs = IO.load(prediction_file)

# Transform labels
label_encoder = OrderedEncoder().fit(['Normal', 'Benign', 'Malignant'])

# ROC Curve
ViewsTools.plot_size((8,8))
for extractor in extractors: 
    extractor_name, extractor_process = extractor
    
    for model in processes:  
        # Decompose model
        model_name, model_process, model_params = model        
        
        # Label
        name = f'{extractor_name}{model_name}Label' 
        display(HTML(ViewsTools.dataframe_renderer([Views.report(inputs, {'label_encode': 'LabelEncode', 'eval': name}, label_encoder)],
                                                                        title=[f'Test - {name}'])))

Unnamed: 0,precision,recall,f1-score,support
Normal,0.41±0.18,0.44±0.11,0.42±0.14,757.00±115.20
Benign,0.65±0.13,0.71±0.05,0.68±0.08,2204.00±115.21
Malignant,0.74±0.05,0.66±0.09,0.70±0.06,2470.00±125.50
accuracy,0.65±0.07,0.65±0.07,0.65±0.07,0.65±0.07
macro avg,0.60±0.03,0.60±0.06,0.60±0.04,5431.00±0.43
weighted avg,0.66±0.08,0.65±0.07,0.65±0.08,5431.00±0.43


Unnamed: 0,precision,recall,f1-score,support
Normal,0.45±0.17,0.38±0.09,0.41±0.08,757.00±115.20
Benign,0.63±0.11,0.73±0.06,0.68±0.07,2204.00±115.21
Malignant,0.75±0.03,0.68±0.09,0.71±0.06,2470.00±125.50
accuracy,0.66±0.05,0.66±0.05,0.66±0.05,0.66±0.05
macro avg,0.61±0.02,0.60±0.04,0.60±0.01,5431.00±0.43
weighted avg,0.66±0.06,0.66±0.05,0.66±0.06,5431.00±0.43


Unnamed: 0,precision,recall,f1-score,support
Normal,0.16±0.10,0.51±0.02,0.25±0.12,757.00±115.20
Benign,0.44±0.10,0.34±0.06,0.38±0.06,2204.00±115.21
Malignant,0.57±0.08,0.32±0.03,0.41±0.04,2470.00±125.50
accuracy,0.35±0.03,0.35±0.03,0.35±0.03,0.35±0.03
macro avg,0.39±0.02,0.39±0.02,0.35±0.03,5431.00±0.43
weighted avg,0.46±0.05,0.35±0.03,0.38±0.02,5431.00±0.43
