# Imports

In [None]:
import sys
sys.path.append('/home/rcendre/classification')
import os
import sys
import itertools
import webbrowser
from pathlib import Path
import matplotlib.pyplot as plt
from numpy import array, logspace
from scipy.stats import randint as randint
from sklearn.decomposition import PCA
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.manifold import TSNE
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import davies_bouldin_score
from toolbox.classification.common import Data, Folds, IO, Tools
from toolbox.classification.parameters import Dermatology, Settings
from toolbox.IO import dermatology
from toolbox.models.builtin import Applications
from toolbox.models.models import DecisionVotingClassifier, ScoreVotingClassifier
from toolbox.transforms.common import PredictorTransform
from toolbox.transforms.labels import OrderedEncoder
from toolbox.transforms.images import DistributionImageTransform, DWTImageTransform, FourierImageTransform, HaralickImageTransform, SpatialImageTransform
from toolbox.views.common import Views, ViewsTools
from toolbox.views.images import ImagesViews
from IPython.display import display
import warnings
warnings.filterwarnings('ignore') 

# Parameters

In [None]:
# Advanced parameters
data_type = 'Full'
extractor = 'Frequency'
validation = 4
settings = Settings.get_default_dermatology()

# Inputs

In [None]:
inputs = IO.load(f'Features.pickle')

In [None]:
inputs = inputs[(inputs['Binary_Diagnosis'] == 'Benign') |(inputs['Binary_Diagnosis'] == 'Malignant')].reset_index(drop=True)

# Low level predictions

In [None]:
low_file = f'Low_Supervised_Frequency.pickle'

In [None]:
low_folds = [([1, 2, 3, 4, 5], [6, 7, 8, 9, 10]),
             ([2, 3, 4, 5, 6], [7, 8, 9, 10, 1]),
             ([3, 4, 5, 6, 7], [8, 9, 10, 1, 2]),
             ([4, 5, 6, 7, 8], [9, 10, 1, 2, 3]),
             ([5, 6, 7, 8, 9], [10, 1, 2, 3, 4]),
             ([6, 7, 8, 9, 10], [1, 2, 3, 4, 5]),
             ([7, 8, 9, 10, 1], [2, 3, 4, 5, 6]),
             ([8, 9, 10, 1, 2], [3, 4, 5, 6, 7]),
             ([9, 10, 1, 2, 3], [4, 5, 6, 7, 8]),
             ([10, 1, 2, 3, 4], [5, 6, 7, 8, 9])]

In [None]:
# Add scaling step
supervised = Pipeline([ ('scale', MinMaxScaler()), ('clf', SVC(kernel='linear', class_weight='balanced', probability=True))])
supervised_params = {'clf__C': logspace(-2, 3, 6).tolist()}

In [None]:
# Transform standard inputs
Tools.fit_predict(inputs, {'datum': extractor, 'label_encode': 'LesionEncode'}, 
                  supervised, 'Supervised', folds=low_folds, distribution=supervised_params)

In [None]:
all_image = [True] * len(inputs.index)
single_image = inputs['ID_Image'] == '0M'
Data.build_bags(inputs, single_image, 'ID_Lesion', all_image, 'ID_Lesion', f'Supervised_{Tools.PREDICTION}')
Data.build_bags(inputs, single_image, 'ID_Lesion', all_image, 'ID_Lesion', f'Supervised_{Tools.PROBABILITY}')
inputs = inputs[single_image].reset_index()

In [None]:
# Save
IO.save(inputs, low_file)

# High level predictions

In [None]:
inputs = IO.load(low_file)

In [None]:
high_file = f'High_Supervised_Frequency.pickle'

In [None]:
high_folds = [([6, 7, 8, 9], [10]),
              ([7, 8, 9, 10], [1]),
              ([8, 9, 10, 1], [2]),
              ([9, 10, 1, 2], [3]),
              ([10, 1, 2, 3], [4]),
              ([1, 2, 3, 4], [5]),
              ([2, 3, 4, 5], [6]),
              ([3, 4, 5, 6], [7]),
              ([4, 5, 6, 7], [8]),
              ([5, 6, 7, 8], [9])]

In [None]:
# Decisions
Tools.evaluate(inputs, {'datum': f'Supervised_{Tools.PREDICTION}', 'label_encode': 'LesionEncode'}, DecisionVotingClassifier(mode='at_least_one'), 'D_ALO', folds=high_folds)
Tools.evaluate(inputs, {'datum': f'Supervised_{Tools.PREDICTION}', 'label_encode': 'LesionEncode'}, DecisionVotingClassifier(mode='dynamic_thresh'), 'D_DYN', folds=high_folds)

# Score
Tools.evaluate(inputs, {'datum': f'Supervised_{Tools.PROBABILITY}', 'label_encode': 'LesionEncode'}, ScoreVotingClassifier(low='max', high='dynamic'), 'S_MaxD', folds=high_folds)
Tools.evaluate(inputs, {'datum': f'Supervised_{Tools.PROBABILITY}', 'label_encode': 'LesionEncode'}, ScoreVotingClassifier(low='mean', high='dynamic'), 'S_MeanD', folds=high_folds)

# High Level
IO.save(inputs, high_file)

# Analysis

## Scores and ROC

In [None]:
from IPython.display import HTML
from IPython.display import display

inputs = IO.load(high_file)

diagnosis_encoder = OrderedEncoder().fit(['Benign', 'Malignant'])
# ROC Curve
ViewsTools.plot_size((8,8))
high_models = ['D_ALO', 'D_DYN', 'S_MaxD', 'S_MeanD']
for name in high_models: 
    #Label
    display(HTML(ViewsTools.dataframe_renderer([Views.report(inputs, {'label_encode': 'LesionEncode', 'eval': name}, diagnosis_encoder)],
                                                                        title=[f'Test - {name}'])))

In [None]:
inputs = IO.load(high_file)

diagnosis_encoder = OrderedEncoder().fit(['Benign', 'Malignant'])

# ROC Curve
ViewsTools.plot_size((8,8))
high_models = ['D_ALO', 'D_DYN', 'S_MaxD', 'S_MeanD']
for name in high_models: 
    # Label
    inputs_lm = inputs[(inputs['Diagnosis']=='LM/LMM')|(inputs['Binary_Diagnosis']=='Benign')]    
    display(HTML(ViewsTools.dataframe_renderer([Views.report(inputs_lm, {'label_encode': 'LesionEncode', 'eval': name}, diagnosis_encoder)],
                                                                        title=[f'Test LM - {name}'])))