In [None]:
import sys
sys.path.append('/home/rcendre/classification')
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from sklearn.pipeline import make_pipeline
from toolbox.classification.common import IO
from toolbox.classification.parameters import Settings
from toolbox.transforms.labels import OrderedEncoder
from toolbox.views.common import Views
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from numpy import array

In [None]:
RANDOM_STATE = 42
FIG_SIZE = (16, 7)

In [None]:
inputs = IO.load(f'Extraction_Transfer.pickle')

In [None]:
settings = Settings.get_default_dermatology()
label_encoder = OrderedEncoder().fit(['Normal', 'Benign', 'Malignant'])

In [None]:
figure=Views.fold_visualization(inputs, {'label_encode':'LabelEncode', 'group_encode':'GroupEncode'}, label_encoder, settings)
figure.savefig('folds_image_classification.svg')

In [None]:
X_train = array(inputs['VGG16Avg'].to_list())[0:500,:]
y_train = array(inputs['LabelEncode'].to_list())[0:500]

X_test = array(inputs['VGG16Avg'].to_list())[500:1000,:]
y_test = array(inputs['LabelEncode'].to_list())[500:1000]

# Fit to data and predict using pipelined GNB and PCA.
unscaled = make_pipeline(PCA(n_components=2))
unscaled.fit(X_train, y_train)

# Fit to data and predict using pipelined scaling, GNB and PCA.
scaled = make_pipeline(MinMaxScaler(), PCA(n_components=2))
scaled.fit(X_train, y_train)

# Use PCA without and with scale on X_train data for visualization.
X_train_unscaled = unscaled.transform(X_train)
X_test_unscaled = unscaled.transform(X_test)
X_train_scaled = scaled.transform(X_train)
X_test_scaled = scaled.transform(X_test)

# visualize standardized vs. untouched dataset with PCA performed
fig, (ax1, ax2, ax3, ax4) = plt.subplots(ncols=4, figsize=FIG_SIZE)


for l, c, name in zip(range(0, 3), ('green', 'yellow', 'orange'), ('Sain', 'Bénin', 'Malin')):
    ax1.scatter(X_train_unscaled[y_train == l, 0],
                X_train_unscaled[y_train == l, 1],
                color=c,
                label=name,
                alpha=0.5
                )
ax1.set_title('Entrainement: ACP')

for l, c, name in zip(range(0, 3), ('green', 'yellow', 'orange'), ('Sain', 'Bénin', 'Malin')):
    ax2.scatter(X_train_scaled[y_train == l, 0],
                X_train_scaled[y_train == l, 1],
                color=c,
                label=name,
                alpha=0.5
                )
ax2.set_title('Entrainement: Normalisation + ACP')

for l, c, name in zip(range(0, 3), ('green', 'yellow', 'orange'), ('Sain', 'Bénin', 'Malin')):
    ax3.scatter(X_test_unscaled[y_train == l, 0],
                X_test_unscaled[y_train == l, 1],
                color=c,
                label=name,
                alpha=0.5
                )
ax3.set_title('Evaluation: ACP')

for l, c, name in zip(range(0, 3), ('green', 'yellow', 'orange'), ('Sain', 'Bénin', 'Malin')):
    ax4.scatter(X_test_scaled[y_train == l, 0],
                X_test_scaled[y_train == l, 1],
                color=c,
                label=name,
                alpha=0.5
                )
ax4.set_title('Evaluation: Normalisation + ACP')

for ax in (ax1, ax2, ax3, ax4):
    ax.set_xlabel('Première composante principale')
    ax.set_ylabel('Seconde composante principale')
    ax.legend(loc='upper right')
    ax.grid()

plt.tight_layout()

plt.show()
fig.savefig('exemple_PCA.svg')

In [None]:
# Fit to data and predict using pipelined GNB and PCA.
unscaled = make_pipeline(LinearDiscriminantAnalysis(n_components=2))
unscaled.fit(X_train, y_train)

# Fit to data and predict using pipelined scaling, GNB and PCA.
scaled = make_pipeline(MinMaxScaler(), LinearDiscriminantAnalysis(n_components=2))
scaled.fit(X_train, y_train)

# Use PCA without and with scale on X_train data for visualization.
X_train_unscaled = unscaled.transform(X_train)
X_test_unscaled = unscaled.transform(X_test)
X_train_scaled = scaled.transform(X_train)
X_test_scaled = scaled.transform(X_test)

# visualize standardized vs. untouched dataset with PCA performed
fig, (ax1, ax2, ax3, ax4) = plt.subplots(ncols=4, figsize=FIG_SIZE)


for l, c, name in zip(range(0, 3), ('green', 'yellow', 'orange'), ('Sain', 'Bénin', 'Malin')):
    ax1.scatter(X_train_unscaled[y_train == l, 0],
                X_train_unscaled[y_train == l, 1],
                color=c,
                label=name,
                alpha=0.5
                )
ax1.set_title('Entrainement : ADL')

for l, c, name in zip(range(0, 3), ('green', 'yellow', 'orange'), ('Sain', 'Bénin', 'Malin')):
    ax2.scatter(X_train_scaled[y_train == l, 0],
                X_train_scaled[y_train == l, 1],
                color=c,
                label=name,
                alpha=0.5
                )
ax2.set_title('Entrainement : Normalisation + ADL')

for l, c, name in zip(range(0, 3), ('green', 'yellow', 'orange'), ('Sain', 'Bénin', 'Malin')):
    ax3.scatter(X_test_unscaled[y_train == l, 0],
                X_test_unscaled[y_train == l, 1],
                color=c,
                label=name,
                alpha=0.5
                )
ax3.set_title('Evaluation : ADL')

for l, c, name in zip(range(0, 3), ('green', 'yellow', 'orange'), ('Sain', 'Bénin', 'Malin')):
    ax4.scatter(X_test_scaled[y_train == l, 0],
                X_test_scaled[y_train == l, 1],
                color=c,
                label=name,
                alpha=0.5
                )
ax4.set_title('Evaluation : Normalisation + ADL')

for ax in (ax1, ax2, ax3, ax4):
    ax.set_xlabel('Première composante principale')
    ax.set_ylabel('Seconde composante principale')
    ax.legend(loc='upper right')
    ax.grid()

plt.tight_layout()

plt.show()
fig.savefig('exemple_LDA.svg')