In [None]:
import numpy as np
# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

In [None]:
from __future__ import print_function

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

from keras import regularizers
from keras.layers import Input, Dense, Lambda
from keras.models import Model
from keras import backend as K
from keras import metrics

def build_model(data):
    batch_size = 20
    original_dim=data.shape[1]
    latent_dim = 64

    epochs = 300
    epsilon_std = 1.0


    x = Input(shape=(original_dim,))
    h = Dense(2048, activation='tanh',
                    activity_regularizer=regularizers.l1(1*10e-5))(x)
    h = Dense(1024, activation='tanh',
                    activity_regularizer=regularizers.l1(1*10e-5))(h)
    h = Dense(512, activation='tanh',
                    activity_regularizer=regularizers.l1(1*10e-5))(h)
    h = Dense(256, activation='tanh',
                    activity_regularizer=regularizers.l1(1*10e-5))(h)
    h = Dense(128, activation='tanh',
                    activity_regularizer=regularizers.l1(1*10e-5))(h)
    z_mean = Dense(latent_dim)(h)
    z_log_var = Dense(latent_dim)(h)


    def sampling(args):
        z_mean, z_log_var = args
        epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0.,
                                  stddev=epsilon_std)
        return z_mean + K.exp(z_log_var / 2) * epsilon

    # note that "output_shape" isn't necessary with the TensorFlow backend
    z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])

    # we instantiate these layers separately so as to reuse them later

    h_decoded = Dense(128, activation='tanh')(z)
    h_decoded = Dense(256, activation='tanh')(h_decoded)
    h_decoded = Dense(512, activation='tanh')(h_decoded)
    h_decoded = Dense(1024, activation='tanh')(h_decoded)
    h_decoded = Dense(2048, activation='tanh')(h_decoded)
    x_decoded_mean = Dense(original_dim, activation='tanh')(h_decoded)

    # instantiate VAE model
    vae = Model(x, x_decoded_mean)

    # Compute VAE loss
    xent_loss = original_dim * metrics.binary_crossentropy(x,x_decoded_mean )
    kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
    vae_loss = K.mean(xent_loss + kl_loss)

    vae.add_loss(vae_loss)
    from keras import optimizers
    rmsprop=optimizers.RMSprop(lr=0.0001, rho=0.9, epsilon=None, decay=0.0)
    vae.compile(optimizer=rmsprop)
    vae.summary()


    vae.fit(data,
            shuffle=True,
            epochs=epochs,
            batch_size=batch_size)

    encoder = Model(x, z)




    return encoder

In [None]:
from nilearn.decomposition import CanICA
def prepare_data(func_filenames):
    canica = CanICA(memory="nilearn_cache", memory_level=2,
                    threshold=3., verbose=10, random_state=0, 
                    mask='/home/share/TmpData/Qinglin/ADHD200_Athena_preproc_flirtfix/ADHD200_mask_152_4mm.nii.gz')
    data=canica.prepare_data(func_filenames)
    return data

In [None]:
from nilearn.connectome import ConnectivityMeasure

def corr(all_time_series):
    connectivity_biomarkers = {}
    conn_measure = ConnectivityMeasure(kind='correlation', vectorize=True)
    connectivity_biomarkers = conn_measure.fit_transform(all_time_series)
    return connectivity_biomarkers

In [None]:
from nilearn.connectome import ConnectivityMeasure

def corr_tan(all_time_series):
    connectivity_biomarkers = {}
    tangent_measure = ConnectivityMeasure(kind='tangent', vectorize=True)
    connectivity_biomarkers = tangent_measure.fit_transform(all_time_series)
    return connectivity_biomarkers


In [None]:
import numpy as np
X=np.load('/home/share/TmpData/Qinglin/ABIDE/X.npy')
Y=np.load('/home/share/TmpData/Qinglin/ABIDE/Y.npy')
model=build_model(X)

from nilearn.datasets import fetch_abide_pcp

# We specify the site and number of subjects we want to download
abide = fetch_abide_pcp(derivatives=['func_preproc'], data_dir='/home/share/TmpData/Qinglin/nilearn_data/')

# We look at the available data in this dataset
print(abide.keys())


func_filenames = abide.func_preproc  # list of 4D nifti files for each subject

from nilearn._utils.niimg_conversions import _resolve_globbing
imgs = _resolve_globbing(func_filenames)

mask_img ='/home/share/TmpData/Qinglin/ADHD200_Athena_preproc_flirtfix/ADHD200_mask_152_4mm.nii.gz'

from nilearn.input_data import NiftiMasker
masker = NiftiMasker(mask_img=mask_img, 
                     standardize=True,
                     detrend=1,
                     smoothing_fwhm=6.,
                     memory="/storage/nilearn_cache", 
                     memory_level=2)
fmri_masked = masker.fit()

from nilearn.decomposition.base import _mask_and_reduce_single
all_time_series=[]
for img in imgs:
    print(img)
    data = _mask_and_reduce_single(
        masker, img, confound=None,
        reduction_ratio=1,
        random_state=0,
        memory_level=3)
    time_series=encoder.predict(data,
        batch_size=batch_size)
    all_time_series.append(time_series)

D=all_time_series    

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from xgboost import XGBClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import GradientBoostingClassifier

names = ["Nearest Neighbors", "RBF SVM", 
         "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
         "Naive Bayes", "QDA","XGBoost","Bagging","GTBoosting"]

classifiers = [
    KNeighborsClassifier(3),
#         SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
#         GaussianProcessClassifier(1.0 * RBF(1.0)),
    DecisionTreeClassifier(max_depth=10),
    RandomForestClassifier(max_depth=10, n_estimators=100, max_features=1),
    MLPClassifier(hidden_layer_sizes=(500,400,300,200,100,50)),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis(),
    XGBClassifier(),
    BaggingClassifier(KNeighborsClassifier(),max_samples=0.5, max_features=0.5),
    GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,max_depth=1, random_state=0)]

from sklearn.model_selection import StratifiedKFold
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)

from sklearn.model_selection import cross_val_score
mean_scores = []


for name, clf in zip(names, classifiers):
    cv_scores = cross_val_score(clf,
                                corr_tan(D),
                                y=Y,
                                cv=cv,
                                groups=Y,
                                scoring='accuracy',
                                )
    mean_scores.append(cv_scores.mean())

In [None]:
from nilearn.plotting import show
import matplotlib.pylab as plt

plt.figure(figsize=(6, 4))
positions = np.arange(len(names)) * .1 + .1
plt.barh(positions, mean_scores, align='center', height=.05)
yticks = [name.replace(' ', '\n') for name in names]
plt.yticks(positions, yticks)
plt.xlabel('Classification accuracy')
plt.grid(True)
plt.tight_layout()

show()