In [None]:
from scripts import preprocess as pp
from scripts import segmentation as seg
from scripts import feature_extraction_segmentation as fex
from scripts import split_data
import os
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
from sklearn.model_selection import GridSearchCV
from sklearn import svm
from sklearn.metrics import confusion_matrix, f1_score, precision_score, \
                            recall_score, accuracy_score, classification_report

from sklearn.preprocessing import MinMaxScaler
import pandas as pd

# K nearest neighbor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix

# Decision tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
from sklearn import tree

In [None]:
def plot_confusion_matrix(true_labels, predicted_labels, class_labels):
    conf_matrix = confusion_matrix(true_labels, predicted_labels)

    plt.figure()
    plt.title('Confusion matrix')
    sns.heatmap(conf_matrix.T, square=True, annot=True, fmt='d', cbar=False,
                xticklabels=sorted(class_labels), yticklabels=sorted(class_labels))
    plt.xlabel('true label')
    plt.ylabel('predicted label')
    plt.draw()
    plt.tight_layout()
    plt.show();

In [None]:
# Set paths
cwd_path = os.getcwd()
train_set_path = cwd_path + "/data/images/training_set/"
val_set_path = cwd_path + "/data/images/val_set/"
preprocessed_train_set_path = cwd_path + "/data/images/preprocessed_train/"
preprocessed_val_set_path = cwd_path + "/data/images/preprocessed_val/"

dataset_path = cwd_path + '/temp/dataset/'
train_csv = cwd_path + '/data/groundtruth_train.csv'
train_reduced_csv = cwd_path + '/data/reduced_groundtruth_train.csv'
val_csv = cwd_path + '/data/groundtruth_val.csv'
val_reduced_csv = cwd_path + '/data/reduced_groundtruth_val.csv'

# Preprocess data
do_preprocess = True
sample_number=100

# Create directories
os.makedirs(train_set_path, exist_ok=True)
os.makedirs(val_set_path, exist_ok=True)
os.makedirs(preprocessed_train_set_path, exist_ok=True)
os.makedirs(preprocessed_val_set_path, exist_ok=True)

In [None]:
# Skip this step if you intend to use the last dataset split
# Split data
split_data.prepare_dataset(dataset_path,train_set_path, val_set_path, train_csv,
                           val_csv, train_reduced_csv, val_reduced_csv, sample_number)

In [None]:
# Skip this step if you intend to use the last dataset split
# Remove black border from training and validation images 
pp.crop_dataset(train_set_path, val_set_path, train_reduced_csv, val_reduced_csv)

In [None]:
# Get output classes from training and validation data. You can not skip this step.
train_classes, val_classes = split_data.read_csv_files(train_reduced_csv, val_reduced_csv)

In [None]:
# Skip this step if you intend to use the last dataset split
# Preprocess data and save preprocessed images
if do_preprocess:
    pp.preprocess_dataset(train_set_path, val_set_path, train_reduced_csv, val_reduced_csv,
                         preprocessed_train_set_path, preprocessed_val_set_path)

In [None]:
# Segmentation
tmp_segmented_lesion_train_set, tmp_segmented_lesion_val_set = seg.get_lesion_region(train_reduced_csv,val_reduced_csv,
                  preprocessed_train_set_path, preprocessed_val_set_path)

In [None]:
# Remove entries which are None
segmented_lesion_train_set = {}
for key,value in tmp_segmented_lesion_train_set.items():
    if value is not None:
        segmented_lesion_train_set[key] = value
        
# Remove entries which are None
segmented_lesion_val_set = {}
for key,value in tmp_segmented_lesion_val_set.items():
    if value is not None:
        segmented_lesion_val_set[key] = value

print('Done')

In [None]:
# Feature extraction
features_train, features_test = fex.features_extraction(segmented_lesion_train_set, segmented_lesion_val_set, 
                train_set_path, val_set_path)

In [None]:
# Remove Nans
tmp_features_train = pd.DataFrame.from_dict(features_train)
tmp_features_train = tmp_features_train.fillna(0)
features_train_input = tmp_features_train.to_dict('list')

tmp_features_val = pd.DataFrame.from_dict(features_test)
tmp_features_val = tmp_features_val.fillna(0)
features_test_input = tmp_features_val.to_dict('list')

In [None]:
# Training and validation data
train_names = list(features_train_input.keys())
test_names = list(features_test_input.keys())

X_test = list(features_test_input.values())
X_train = list(features_train_input.values())

# normalizing features
scaler = MinMaxScaler(feature_range=(0, 1))
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

y_train = []
y_test = []

for img_name in train_names:
    y_train.append(train_classes[img_name])

for img_name in test_names:
    y_test.append(val_classes[img_name])
    

In [None]:
print("Training the SVM classifier...")

param_grid = {'C': [1, 1e1, 1e2, 1e3, 5e3, 1e4],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
              'class_weight': [None, 'balanced']}
clf = GridSearchCV(svm.SVC(kernel='rbf'), param_grid, iid=False)
clf = clf.fit(X_train, y_train)



print("Best estimator found by Grid Search:")
print(clf.best_estimator_)

y_pred = clf.predict(X_test)







In [None]:
class_labels = ['MEL','NV','BCC','AK','BKL','DF','VASC','SCC']
print('*** TEST SET PERFORMANCE EVALUATION - Segmentation + Feature Extraction + SVM ***')
# compute and plot performance metrics
accuracy = accuracy_score(y_test, y_pred)
val_f1 = f1_score(y_test, y_pred, average='weighted')
val_recall = recall_score(y_test, y_pred, average='weighted')
val_precision = precision_score(y_test, y_pred, average='weighted')

print('Accuracy: {:.3f}'.format(accuracy))
print('F1-score: {:.3f}'.format(val_f1))
print('Recall: {:.3f}'.format(val_recall))
print('Precision: {:.3f}'.format(val_precision))

print('\nClassification report:')
print(classification_report(y_test, y_pred, target_names=class_labels))

plot_confusion_matrix(y_test, y_pred, class_labels)

In [None]:
#KNN K = 15 -> "Best" accuracy

param_grid = {'C': [1, 1e1, 1e2, 1e3, 5e3, 1e4],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
              'class_weight': [None, 'balanced']}
clf2 = KNeighborsClassifier(15)
clf2 = clf2.fit(X_train, y_train)


print("Accuracy:")
predicted = clf2.predict(X_test)
print(1 - (sum(1 for i in (predicted == y_test) if i==False)/len(predicted)))

predicted_testing = clf2.predict(X_testing)


In [None]:
# KNN K = 15, distance 

param_grid = {'C': [1, 1e1, 1e2, 1e3, 5e3, 1e4],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
              'class_weight': [None, 'balanced']}
clf2 = KNeighborsClassifier(15, weights='distance')
clf2 = clf2.fit(X_train, y_train)


print("Accuracy:")
predicted = clf2.predict(X_test)
print(1 - (sum(1 for i in (predicted == y_test) if i==False)/len(predicted)))

In [None]:
# KNN K = 50, distance

param_grid = {'C': [1, 1e1, 1e2, 1e3, 5e3, 1e4],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
              'class_weight': [None, 'balanced']}
clf2 = KNeighborsClassifier(50, weights='distance')
clf2 = clf2.fit(X_train, y_train)


print("Accuracy:")
predicted = clf2.predict(X_test)
print(1 - (sum(1 for i in (predicted == y_test) if i==False)/len(predicted)))

In [None]:
# KNN K = 15, uniform 

param_grid = {'C': [1, 1e1, 1e2, 1e3, 5e3, 1e4],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
              'class_weight': [None, 'balanced']}
clf2 = KNeighborsClassifier(15, weights='uniform')
clf2 = clf2.fit(X_train, y_train)


print("Accuracy:")
predicted = clf2.predict(X_test)
print(1 - (sum(1 for i in (predicted == y_test) if i==False)/len(predicted)))

In [None]:
# KNN K = 50, uniform 

param_grid = {'C': [1, 1e1, 1e2, 1e3, 5e3, 1e4],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
              'class_weight': [None, 'balanced']}
clf2 = KNeighborsClassifier(50, weights='uniform')
clf2 = clf2.fit(X_train, y_train)


print("Accuracy:")
predicted = clf2.predict(X_test)
print(1 - (sum(1 for i in (predicted == y_test) if i==False)/len(predicted)))

In [None]:
# Decision Tree - Gini 

param_grid = {'C': [1, 1e1, 1e2, 1e3, 5e3, 1e4],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
              'class_weight': [None, 'balanced']}
clf2 = DecisionTreeClassifier(criterion='gini')
clf2 = clf2.fit(X_train, y_train)


print("Accuracy:")
predicted = clf2.predict(X_test)
print(1 - (sum(1 for i in (predicted == y_test) if i==False)/len(predicted)))

In [None]:
# Decision Tree - Entropy 

param_grid = {'C': [1, 1e1, 1e2, 1e3, 5e3, 1e4],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
              'class_weight': [None, 'balanced']}
clf2 = DecisionTreeClassifier(criterion='gini')
clf2 = clf2.fit(X_train, y_train)


print("Accuracy:")
predicted = clf2.predict(X_test)
print(1 - (sum(1 for i in (predicted == y_test) if i==False)/len(predicted)))