In [1]:
# Import modules
import numpy as np
import seaborn as sns
import pandas as pd
from sklearn import model_selection, preprocessing
from sklearn.model_selection import GridSearchCV

# Import PySwarms
import pyswarms as ps

# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
data = pd.read_csv('parkinsons.csv', delimiter=',')

In [3]:
X = data.drop(columns=['name', 'status'])
y = data['status']
X = np.array(X)
y = np.array(y)
y.dtype

dtype('int64')

In [4]:
# num_features = X.shape[1]

In [5]:
from sklearn.neural_network import MLPClassifier
parameters = {'activation':('identity', 'logistic', 'tanh', 'relu'),'solver':('sgd', 'adam'),
             'alpha':(0.0001, 0.001, 0.01, 0.1)}
# clf = clf = MLPClassifier(random_state=0, max_iter=4000, activation='logistic',
#                         solver='adam', alpha=0.0001, learning_rate='constant')
classifier = MLPClassifier(random_state=0, activation='logistic',alpha=0.001,
 learning_rate='constant',solver='adam', max_iter=4000)
# classifier = MLPClassifier(random_state=0, max_iter=4000, learning_rate='constant')
# classifier = GridSearchCV(classifier, parameters)

In [6]:
def f_per_particle(m, alpha):
    """Computes for the objective function per particle

    Inputs
    ------
    m : numpy.ndarray
        Binary mask that can be obtained from BinaryPSO, will
        be used to mask features.
    alpha: float (default is 0.5)
        Constant weight for trading-off classifier performance
        and number of features

    Returns
    -------
    numpy.ndarray
        Computed objective function
    """
    total_features = X.shape[1]
    # Get the subset of the features from the binary mask
    if np.count_nonzero(m) == 0:
        X_subset = preprocessing.scale(X)
    else:
        X_subset = preprocessing.scale(X[:,m==1])
    # Perform classification and store performance in P
    classifier.fit(X_subset, y)
    P = (classifier.predict(X_subset) == y).mean()
    # Compute for the objective function
    j = (alpha * (1.0 - P)
        + (1.0 - alpha) * (1 - (X_subset.shape[1] / total_features)))

    return j

In [7]:
def f(x, alpha=0.88):
    """Higher-level method to do classification in the
    whole swarm.

    Inputs
    ------
    x: numpy.ndarray of shape (n_particles, dimensions)
        The swarm that will perform the search

    Returns
    -------
    numpy.ndarray of shape (n_particles, )
        The computed loss for each particle
    """
    n_particles = x.shape[0]
    j = [f_per_particle(x[i], alpha) for i in range(n_particles)]
    return np.array(j)

In [1]:
# Initialize swarm, arbitrary
options = {'c1': 0.5, 'c2': 0.5, 'w':0.9, 'k': 30, 'p':2}

# Call instance of PSO
dimensions = X.shape[1] # dimensions should be the number of features

optimizer = ps.discrete.BinaryPSO(n_particles=30, dimensions=dimensions, options=options)

# Perform optimization
cost, pos = optimizer.optimize(f, iters=500)

NameError: name 'X' is not defined

In [None]:
# Get the selected features from the final positions
X_selected_features = X[:,pos==1]  # subset

In [None]:
x_train, x_test, y_train, y_test = model_selection.train_test_split(X_selected_features, y, test_size=0.3, random_state=0)

In [None]:
x_val, x_test, y_val, y_test = model_selection.train_test_split(x_test, y_test, test_size=0.5, random_state=0)

In [None]:
from sklearn.neural_network import MLPClassifier
parameters = {'activation':('identity', 'logistic', 'tanh', 'relu'),'solver':('sgd', 'adam'),
             'alpha':(0.0001, 0.001, 0.01, 0.1)}
# parameters = {'alpha':(0.0001, 0.001, 0.01, 0.1),'learning_rate':('constant', 'invscaling', 'adaptive')}
# parameters = {'activation':('identity', 'logistic', 'tanh', 'relu'), 'solver':('lbfgs', 'sgd', 'adam')}
# clf = clf = MLPClassifier(random_state=0, max_iter=4000, activation='logistic',
#                         solver='adam', alpha=0.0001, learning_rate='constant')
# clf = MLPClassifier(random_state=0, activation='logistic',alpha=0.001,
#  learning_rate='constant',solver='adam', max_iter=2000)
clf = MLPClassifier(random_state=0, max_iter=2000, learning_rate='constant')
clf = GridSearchCV(clf, parameters)

In [None]:
X_selected_features.shape

In [None]:
from sklearn import preprocessing
x_train = preprocessing.scale(x_train)

In [None]:
x_train.shape
# X_train_scaled.shape

In [None]:
clf.fit(x_train, y_train)

In [None]:
clf.best_params_

In [None]:
x_val = preprocessing.scale(x_val)
clf.score(x_val, y_val)

In [None]:
x_test = preprocessing.scale(x_test)
from sklearn.metrics import confusion_matrix
y_true = y_test
y_pred = clf.predict(x_test)
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
specificity = tn / (tn+fp)
print("Specificity is : " , specificity)
accuracy = (tp + tn) / (tp + tn + fp + fn)
print("Accuracy is : " , accuracy)
sensitivity = tp / (tp + fn)
print("Sensitivity is : " , sensitivity)