In [1]:
from code.neuron import *
from code.encodingsource import *
from code.hsgs import *
from code.classical_neuron import *
from code.classical_pso import *
from code.sf import *
simulator = Aer.get_backend('qasm_simulator')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
from scipy import stats


from code.experiment_functions import *

  warn_package('aqua', 'qiskit-terra')


In [2]:
diabetes = pd.read_csv('data_and_results/diabetes/diabetes.csv')

In [3]:
# normalization 0-1
for i in diabetes.columns:
    diabetes[i] = (diabetes[i] - diabetes[i].min()) / (diabetes[i].max() - diabetes[i].min())

In [4]:
# split in test and train data
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(diabetes.drop('Outcome',axis=1), diabetes['Outcome'], test_size=0.2)

### Train and Test MLP for Comparison

In [23]:
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn import metrics

In [24]:
parameter_space = {
    'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}

In [25]:
mlp = MLPClassifier(max_iter=100)
clf = GridSearchCV(mlp, parameter_space, n_jobs=-1, cv=3)
clf.fit(X_train, y_train)



GridSearchCV(cv=3, estimator=MLPClassifier(max_iter=100), n_jobs=-1,
             param_grid={'activation': ['tanh', 'relu'],
                         'alpha': [0.0001, 0.05],
                         'hidden_layer_sizes': [(50, 50, 50), (50, 100, 50),
                                                (100,)],
                         'learning_rate': ['constant', 'adaptive'],
                         'solver': ['sgd', 'adam']})

In [26]:
print('Best parameters found:\n', clf.best_params_)

Best parameters found:
 {'activation': 'relu', 'alpha': 0.05, 'hidden_layer_sizes': (50, 50, 50), 'learning_rate': 'adaptive', 'solver': 'adam'}


In [27]:
predicted = clf.predict(X_test)

In [28]:
print(metrics.precision_score(y_test, predicted))
print(metrics.accuracy_score(y_test, predicted))
print(metrics.recall_score(y_test, predicted))
print(metrics.f1_score(y_test, predicted))
print(metrics.roc_auc_score(y_test, predicted))

0.7288135593220338
0.8051948051948052
0.7543859649122807
0.7413793103448276
0.7947187556520168


In [29]:
predicted = clf.predict_proba(X_test)

In [30]:
predicted = [i[1] for i in predicted]

In [31]:
indicesa = [i for i, x in enumerate(y_test) if x == 0]
a = list(map(predicted.__getitem__, indicesa))
indicesb = [i for i, x in enumerate(y_test) if x == 1]
b = list(map(predicted.__getitem__, indicesb))
#stats.ks_2samp(a, b)[1]
stats.ks_2samp(a, b)


KstestResult(statistic=0.5997467896545488, pvalue=1.36590738719633e-12)

In [32]:
np.min(clf.cv_results_['mean_test_score'])

0.656352622349753

### Train and Test Classical Neuron for Comparison

In [None]:
from sklearn.linear_model import Perceptron

In [None]:
parameter_space = {
    'penalty': ['l2', 'l1', 'elasticnet']
}

In [None]:
percep = Perceptron()
clf = GridSearchCV(percep, parameter_space, n_jobs=-1, cv=3)
clf.fit(X_train, y_train)

In [None]:
print('Best parameters found:\n', clf.best_params_)

In [None]:
predicted = clf.predict(X_test)

In [None]:
print(metrics.precision_score(y_test, predicted))
print(metrics.accuracy_score(y_test, predicted))
print(metrics.recall_score(y_test, predicted))
print(metrics.f1_score(y_test, predicted))
print(metrics.roc_auc_score(y_test, predicted))

In [None]:
predicted

In [None]:
predicted = clf.predict(X_test)
#predicted = [i[1] for i in predicted]

In [18]:
def KS(predicted, y_test):
    indicesa = [i for i, x in enumerate(y_test) if x == 0]
    a = list(map(predicted.__getitem__, indicesa))
    indicesb = [i for i, x in enumerate(y_test) if x == 1]
    b = list(map(predicted.__getitem__, indicesb))
    #stats.ks_2samp(a, b)[1]
    return stats.ks_2samp(a, b)


In [None]:
p_precision_score=[]
p_accuracy_score=[]
p_recall_score=[]
p_f1_score=[]

In [None]:
for pen in ['l2', 'l1', 'elasticnet']:
    for i in range(10):
        percep = Perceptron(penalty=pen)
        percep.fit(X_train, y_train)
        predicted = clf.predict(X_test)
        p_precision_score.append(metrics.precision_score(y_test, predicted))
        p_accuracy_score.append(metrics.accuracy_score(y_test, predicted))
        p_recall_score.append(metrics.recall_score(y_test, predicted))
        p_f1_score.append(metrics.f1_score(y_test, predicted))

In [None]:
print(np.max(p_precision_score))
print(np.max(p_accuracy_score))
print(np.mean(p_accuracy_score))
print(np.max(p_recall_score))
print(np.max(p_f1_score))

## Preprocessing for classical

In [5]:
def angleTransformation(inputVector):
    inputVector = [math.atan(inputVector[i]/inputVector[i+1]) for i in range(0, len(inputVector), 2)] + [np.sqrt(sum([i*i for i in inputVector])), math.asin(inputVector[-1]/np.sqrt(sum([i*i for i in inputVector])))] + [0]*(int(len(inputVector)/2) -2)
    return inputVector

def radiusTransformation(inputVector):
    inputVector = [math.sqrt(inputVector[i]**2 + inputVector[i+1]**2) for i in range(0, len(inputVector), 2)] + [np.sqrt(sum([i*i for i in inputVector])), math.asin(inputVector[-1]/np.sqrt(sum([i*i for i in inputVector])))] + [0]*(int(len(inputVector)/2) - 2)
    return inputVector

def angleRadiusTransformation(inputVector):
    inputVector = [math.sqrt(inputVector[i]**2 + inputVector[i+1]**2) for i in range(0, len(inputVector), 2)] + [math.atan(inputVector[i]/inputVector[i+1]) for i in range(0, len(inputVector), 2)] + [np.sqrt(sum([i*i for i in inputVector])), math.asin(inputVector[-1]/np.sqrt(sum([i*i for i in inputVector])))] 
    return inputVector

In [6]:
X_train_angle = X_train.apply(angleTransformation, axis=1).apply(pd.Series).fillna(0)
X_train_radius = X_train.apply(radiusTransformation, axis=1).apply(pd.Series).fillna(0)
X_train_angle_radius = X_train.apply(angleRadiusTransformation, axis=1).apply(pd.Series).fillna(0)

  
  
  # Remove the CWD from sys.path while we load stuff.
  # Remove the CWD from sys.path while we load stuff.


In [7]:
X_test_angle = X_test.apply(angleTransformation, axis=1).apply(pd.Series).fillna(0)
X_test_radius = X_test.apply(radiusTransformation, axis=1).apply(pd.Series).fillna(0)
X_test_angle_radius = X_test.apply(angleRadiusTransformation, axis=1).apply(pd.Series).fillna(0)

  
  
  # Remove the CWD from sys.path while we load stuff.
  # Remove the CWD from sys.path while we load stuff.


### MLP with preprocessing strategies

In [37]:
def runMLP(X_train, y_train, X_test, y_test):
    from sklearn.model_selection import GridSearchCV
    from sklearn.neural_network import MLPClassifier
    from sklearn import metrics

    parameter_space = {
        'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)],
        'activation': ['tanh', 'relu'],
        'solver': ['sgd', 'adam'],
        'alpha': [0.0001, 0.05],
        'learning_rate': ['constant','adaptive'],
    }

    mlp = MLPClassifier(max_iter=100)
    clf = GridSearchCV(mlp, parameter_space, n_jobs=-1, cv=3)
    clf.fit(X_train, y_train)

    predicted = clf.predict(X_test)
    
    print('accuracy_score: ', metrics.accuracy_score(y_test, predicted))
    print('min accuracy_score : ', np.min(clf.cv_results_['mean_test_score']))
    print('precision_score: ', metrics.precision_score(y_test, predicted))
    print('recall_score: ', metrics.recall_score(y_test, predicted))
    print('f1_score: ', metrics.f1_score(y_test, predicted))
    print('roc_auc_score: ', metrics.roc_auc_score(y_test, predicted))
    print('Kolmogorov-Smirnov : ', KS(predicted, y_test))
    
    

In [34]:
runMLP(X_train_angle, y_train, X_test_angle, y_test)

accuracy_score:  0.7337662337662337
min accuracy_score :  0.656344651681811
precision_score:  0.6538461538461539
recall_score:  0.5964912280701754
f1_score:  0.6238532110091742
roc_auc_score:  0.7054621088804486
Kolmogorov-Smirnov :  KstestResult(statistic=0.4109242177608971, pvalue=5.800445875614457e-06)




In [35]:
runMLP(X_train_radius, y_train, X_test_radius, y_test)

accuracy_score:  0.7792207792207793
min accuracy_score :  0.656344651681811
precision_score:  0.7804878048780488
recall_score:  0.5614035087719298
f1_score:  0.653061224489796
roc_auc_score:  0.7343100018086453
Kolmogorov-Smirnov :  KstestResult(statistic=0.46862000361729067, pvalue=1.1206235561722622e-07)


In [36]:
runMLP(X_train_angle_radius, y_train, X_test_angle_radius, y_test)

accuracy_score:  0.7792207792207793
min accuracy_score :  0.656336681013869
precision_score:  0.7090909090909091
recall_score:  0.6842105263157895
f1_score:  0.6964285714285715
roc_auc_score:  0.759631036353771
Kolmogorov-Smirnov :  KstestResult(statistic=0.5192620727075421, pvalue=2.1363334434809644e-09)




## CP with preprocessing strategies

In [38]:
def runCP(X_train, y_train, X_test, y_test):
    from sklearn.linear_model import Perceptron

    parameter_space = {
    'penalty': ['l2', 'l1', 'elasticnet']
    }

    percep = Perceptron()
    clf = GridSearchCV(percep, parameter_space, n_jobs=-1, cv=3)
    clf.fit(X_train, y_train)

    predicted = clf.predict(X_test)
    
    print('accuracy_score: ', metrics.accuracy_score(y_test, predicted))
    print('min accuracy_score : ', np.min(clf.cv_results_['mean_test_score']))
    print('precision_score: ', metrics.precision_score(y_test, predicted))
    print('recall_score: ', metrics.recall_score(y_test, predicted))
    print('f1_score: ', metrics.f1_score(y_test, predicted))
    print('roc_auc_score: ', metrics.roc_auc_score(y_test, predicted))
    print('Kolmogorov-Smirnov : ', KS(predicted, y_test))
    

In [39]:
runCP(X_train_angle, y_train, X_test_angle, y_test)

accuracy_score:  0.5194805194805194
min accuracy_score :  0.6058982942770604
precision_score:  0.43410852713178294
recall_score:  0.9824561403508771
f1_score:  0.6021505376344086
roc_auc_score:  0.6149394103816241
Kolmogorov-Smirnov :  KstestResult(statistic=0.22987882076324834, pvalue=0.03640309894586102)


In [40]:
runCP(X_train_radius, y_train, X_test_radius, y_test)

accuracy_score:  0.7272727272727273
min accuracy_score :  0.7133269567989796
precision_score:  0.8571428571428571
recall_score:  0.3157894736842105
f1_score:  0.46153846153846156
roc_auc_score:  0.6424308193163322
Kolmogorov-Smirnov :  KstestResult(statistic=0.2848616386326641, pvalue=0.004443827418292412)


In [41]:
runCP(X_train_angle_radius, y_train, X_test_angle_radius, y_test)

accuracy_score:  0.7597402597402597
min accuracy_score :  0.6109676390881557
precision_score:  0.6428571428571429
recall_score:  0.7894736842105263
f1_score:  0.7086614173228346
roc_auc_score:  0.7658708627238199
Kolmogorov-Smirnov :  KstestResult(statistic=0.5317417254476398, pvalue=7.559796122436069e-10)
