# I. Classification Metrics

In [14]:
import warnings
from warnings import filterwarnings
from pandas import read_csv
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.linear_model import LogisticRegression # Linear
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA# Linear
from sklearn.neighbors import KNeighborsClassifier as KNN # Non-Linear
from sklearn.naive_bayes import GaussianNB # Non-linear
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.svm import SVC # Non-linear

filename = 'GLAUCOMA_ACRIMA.csv'
dataframe = read_csv(filename)
#print(dataframe)

#print(dataframe)
values = dataframe.values
#print(values)
# Split inputs (X) and outputs (Y)
X = values[  :  , 0: -1] # Toutes les lignes et toutes les colonnes (sauf la derniere)
Y =  values[ : , -1] #Toutes les lignes et (de) la derniere colonne
seed, num_fold = 7, 7
kfold = KFold(n_splits=num_fold, random_state=seed, shuffle=True)
# List of algorithms
algorithms = []
algorithms.append(('Logistic Regression', LogisticRegression(solver='newton-cg')))
algorithms.append(('LDA', LDA()))
algorithms.append(('K-Nearest Neighbors', KNN()))
algorithms.append(('Naive Bayes', GaussianNB()))
algorithms.append(('Decision Tree', DTC()))
algorithms.append(('SVM', SVC()))
# List of Metrics
metrics = []
metrics.append(('Accuracy', 'accuracy'))
metrics.append(('Rceall', 'recall'))
metrics.append(('Precision', 'precision'))
metrics.append(('F1-Score', 'f1'))
metrics.append(('ROC', 'roc_auc'))
for name, metric in metrics:
    print('%s\n--------' % name)
    for algo_name, algorithm in algorithms:
        model = algorithm
        results = cross_val_score(model, X, Y, cv=kfold, scoring=metric)    
        print("%s : %.3f (%.3f)" %(algo_name,results.mean(), results.std()))

Accuracy
--------
Logistic Regression : 0.850 (0.048)
LDA : 0.897 (0.035)
K-Nearest Neighbors : 0.830 (0.044)
Naive Bayes : 0.873 (0.035)
Decision Tree : 0.864 (0.046)
SVM : 0.850 (0.039)
Rceall
--------
Logistic Regression : 0.875 (0.051)
LDA : 0.842 (0.059)
K-Nearest Neighbors : 0.824 (0.064)
Naive Bayes : 0.863 (0.055)
Decision Tree : 0.881 (0.050)
SVM : 0.807 (0.059)
Precision
--------
Logistic Regression : 0.866 (0.075)
LDA : 0.975 (0.011)
K-Nearest Neighbors : 0.880 (0.063)
Naive Bayes : 0.912 (0.028)
Decision Tree : 0.874 (0.069)
SVM : 0.923 (0.060)
F1-Score
--------
Logistic Regression : 0.869 (0.054)
LDA : 0.903 (0.037)
K-Nearest Neighbors : 0.848 (0.043)
Naive Bayes : 0.886 (0.037)
Decision Tree : 0.881 (0.056)
SVM : 0.860 (0.046)
ROC
--------
Logistic Regression : 0.920 (0.036)
LDA : 0.970 (0.019)
K-Nearest Neighbors : 0.912 (0.034)
Naive Bayes : 0.915 (0.042)
Decision Tree : 0.855 (0.044)
SVM : 0.915 (0.033)


# Data Transform

In [5]:
# Rescale
from sklearn.preprocessing import MinMaxScaler
#Standardization
from sklearn.preprocessing import StandardScaler
# Normalization
from sklearn.preprocessing import Normalizer

# List de transformation
transforms = []
transforms.append(('Rescale', MinMaxScaler(feature_range=(0,1))))
transforms.append(('Standardization', StandardScaler()))
transforms.append(('Normalization', Normalizer()))

filename = 'GLAUCOMA_ACRIMA.csv'
dataframe = read_csv(filename)
values = dataframe.values
X = values[  :  , 0: -1] # Toutes les lignes et toutes les colonnes (sauf la derniere)
Y =  values[ : , -1] #Toutes les lignes et (de) la derniere colonne
seed, num_fold = 7, 5
kfold = KFold(n_splits=num_fold, random_state=seed, shuffle=True)
metric = 'accuracy'
algo_name = 'Regression Logistic'
model = LogisticRegression(solver='newton-cg')
results = cross_val_score(model, X, Y, cv=kfold, scoring=metric)    
print("%s : %.3f (%.3f)" %(algo_name,results.mean(), results.std()))

for transf_name, transform in transforms:
    scaler = transform
    transformedX = scaler.fit_transform(X)
    results = cross_val_score(model, transformedX, Y, cv=kfold, scoring=metric)    
    print("%s : %.3f (%.3f)" %(transf_name,results.mean(), results.std()))
    #print(transformedX)
    #print('____________________________')

Regression Logistic : 0.842 (0.044)
Rescale : 0.877 (0.028)
Standardization : 0.889 (0.021)
Normalization : 0.846 (0.044)


In [13]:
for name, metric in metrics:
    print('%s\n--------' % name)
    for algo_name, algorithm in algorithms:
        model = algorithm
        results = cross_val_score(model, X, Y, cv=kfold, scoring=metric)    
        print("\t %s : %.3f (%.3f)" %(algo_name,results.mean(), results.std()))
        for transf_name, transform in transforms:
            scaler = transform
            transformedX = scaler.fit_transform(X)
            results = cross_val_score(model, transformedX, Y, cv=kfold, scoring=metric)    
            print("\t \t %s : %.3f (%.3f)" %(transf_name,results.mean(), results.std()))

Accuracy
--------
	 Logistic Regression : 0.842 (0.044)
	 	 Rescale : 0.877 (0.028)
	 	 Standardization : 0.889 (0.021)
	 	 Normalization : 0.846 (0.044)
	 LDA : 0.901 (0.025)
	 	 Rescale : 0.901 (0.025)
	 	 Standardization : 0.901 (0.025)
	 	 Normalization : 0.885 (0.045)
	 K-Nearest Neighbors : 0.834 (0.044)
	 	 Rescale : 0.869 (0.021)
	 	 Standardization : 0.869 (0.026)
	 	 Normalization : 0.869 (0.027)
	 Naive Bayes : 0.877 (0.032)
	 	 Rescale : 0.877 (0.032)
	 	 Standardization : 0.877 (0.032)
	 	 Normalization : 0.836 (0.038)
	 Decision Tree : 0.848 (0.022)
	 	 Rescale : 0.848 (0.033)
	 	 Standardization : 0.842 (0.044)
	 	 Normalization : 0.840 (0.056)
	 SVM : 0.853 (0.042)
	 	 Rescale : 0.883 (0.025)
	 	 Standardization : 0.883 (0.028)
	 	 Normalization : 0.850 (0.040)
Rceall
--------
	 Logistic Regression : 0.870 (0.052)
	 	 Rescale : 0.877 (0.049)
	 	 Standardization : 0.887 (0.036)
	 	 Normalization : 0.837 (0.062)
	 LDA : 0.853 (0.034)
	 	 Rescale : 0.853 (0.034)
	 	 Standa