In [37]:
import os 
import re
import glob

import numpy as np 
import pandas as pd 
from PIL import Image 
from matplotlib import pyplot as plt 
import seaborn as sns 

from skimage import morphology 
from skimage.transform import rotate
from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score
from sklearn.preprocessing import StandardScaler

from time import time 

In [38]:
# Creating the main data set
features = pd.read_csv("../features/feature_set.csv", sep = ";")
data = pd.read_csv("../data/ISIC-2017_Training_Part3_GroundTruth.csv")

image_data = pd.merge(features, data, on = "image_id")

image_data = image_data.drop(["image_id", "seborrheic_keratosis"], axis = 1)

feature_list = image_data.columns.tolist()
feature_list.remove('melanoma')


In [42]:
# Separating the data into train and test sets

df, df2 = image_data, image_data

noise = pd.DataFrame(data = np.random.RandomState(23).uniform(0, 0.1, size=(df2.shape[0], 20)), columns = [i for i in range(20)])

X = df2[feature_list]
X_noisy = pd.merge(X, noise, left_index = True, right_index = True)

x_scaled = StandardScaler().fit(X.values)
x_scaled_df = pd.DataFrame(x_scaled, index=X.index, columns=X.columns)
x_noisy_scaled = StandardScaler().fit(X_noisy.values)
x_noisy_scaled_df = pd.DataFrame(x_noisy_scaled, index=X_noisy.index, columns=X_noisy.columns)
y = df2['melanoma']


X_dev, X_test, y_dev, y_test = train_test_split(x_scaled_df, y, stratify=y, random_state=42)

X_train, X_val, y_train, y_val = train_test_split(X_dev, y_dev, stratify=y_dev)




In [43]:
# training the classifiers 
classifiers_name = ["KNN_1", "KNN_3", "KNN_5", "KNN_10", "KNN_50", "Tree", "Gaussian"]
classifiers = []
classifiers.append(KNeighborsClassifier(n_neighbors = 1))
classifiers.append(KNeighborsClassifier(n_neighbors = 3))
classifiers.append(KNeighborsClassifier(n_neighbors = 5))
classifiers.append(KNeighborsClassifier(n_neighbors = 10))
classifiers.append(KNeighborsClassifier(n_neighbors = 50))
classifiers.append(DecisionTreeClassifier())
classifiers.append(GaussianProcessClassifier())

trained_classifiers = [classifier.fit(X_train, y_train) for classifier in classifiers]                   


TypeError: float() argument must be a string or a number, not 'StandardScaler'

In [41]:
#Evaluating the features
predictions = [trained.predict(X_val) for trained in trained_classifiers]

accuracy_scores = [accuracy_score(y_val, prediction) for prediction in predictions]
auc_scores = [roc_auc_score(y_val, prediction) for prediction in predictions]
f1_scores = [f1_score(y_val, prediction) for prediction in predictions]

columns = ["Classifier", "Accuracy score", "Roc Auc score", "F1 score"]
data = np.array([classifiers_name, accuracy_scores, auc_scores, f1_scores]).T
Classifier_evaluation = pd.DataFrame(data = data, columns = columns)
Classifier_evaluation

Unnamed: 0,Classifier,Accuracy score,Roc Auc score,F1 score
0,KNN_1,0.7386666666666667,0.5476580796252928,0.2575757575757575
1,KNN_3,0.784,0.5259953161592505,0.1649484536082474
2,KNN_5,0.7893333333333333,0.5072599531615926,0.0919540229885057
3,KNN_10,0.8106666666666666,0.503864168618267,0.0273972602739726
4,KNN_50,0.8133333333333334,0.5,0.0
5,Tree,0.7173333333333334,0.5675644028103044,0.3026315789473685
6,Gaussian,0.8133333333333334,0.5,0.0
