Cleaning up code to simplify applications

BlueBrain · May 14, 2018 · 2f8a80b · 2f8a80b
1 parent ab560ae
commit 2f8a80b
Show file tree

Hide file tree

Showing 11 changed files with 382 additions and 303 deletions.
diff --git a/doc/Installation.txt b/doc/Installation.txt
@@ -7,7 +7,7 @@ virtualenv tmd_test
 # Install tmd module
 pip install -e TMD
 
-# If you see the error "Command "python setup.py egg_info" failed with error code 1 in /TMD" try:
-pip install pystan
-
 # Now your module is ready to use
+ipython
+# Import module
+import tmd
diff --git a/examples/Advanced/classification.py b/examples/Advanced/classification.py
@@ -0,0 +1,205 @@
+list_of_modules = ['discriminant_analysis', 'discriminant_analysis', 'tree']
+
+list_of_classifiers =['LinearDiscriminantAnalysis', 'QuadraticDiscriminantAnalysis', 'DecisionTreeClassifier']
+
+
+def train(mod, classifier, data, labels, **kwargs):
+    '''Trains the classifier from mod of sklearn
+       with data and targets.
+       Returns a fited classifier.
+    '''
+    import importlib
+
+    clas_mod = importlib.import_module('sklearn.' + mod)
+    clf = getattr(clas_mod, classifier)()
+    clf.set_params(**kwargs)
+
+    clf.fit(data, labels)
+
+    return clf
+
+def predict(clf, data):
+    '''Predict label for data for the trained classifier clf.
+       Returns the index of the predicted class
+       for each datapoint in data.
+    '''
+    predict_label = clf.predict([data])
+
+    return predict_label[0]
+
+def leave_one_out(mod, classifier, data, labels, **kwargs):
+    '''Leaves one individual out, trains classifier
+       with the rest of the data and returns the score
+       of matching ids between proposed and predicted labels.
+       Score defines how many trials were successful
+       as a percentage over the total number of trials.
+    '''
+    sample_size = len(labels)
+    scores = np.zeros(sample_size)
+
+    for ed in xrange(sample_size):
+
+        #print 'Testing ' + str(ed) + ' ...'
+
+        train_data = data[np.delete(xrange(sample_size), ed)]
+        train_labels = labels[np.delete(xrange(sample_size), ed)]
+
+        clf = train(mod, classifier, train_data, train_labels, **kwargs)
+        predict_label = predict(clf, data[ed])
+
+        #print 'The individual ' + str(ed) + ' is of type ' + str(predict_label)
+
+        scores[ed] = predict_label==labels[ed]
+
+    return np.float(np.count_nonzero(scores))/sample_size
+
+
+def leave_one_out_statistics(mod, classifier, data, labels, N=10, **kwargs):
+    '''Leaves one individual out, trains classifier
+       with the rest of the data and returns the score
+       of matching ids between proposed and predicted labels.
+       Score defines how many trials were successful
+       as a percentage over the total number of trials.
+    '''
+    sample_size = len(labels)
+    scores = np.zeros(sample_size)
+
+    for ed in xrange(sample_size):
+
+        #print 'Testing ' + str(ed) + ' ...'
+
+        train_data = data[np.delete(xrange(sample_size), ed)]
+        train_labels = labels[np.delete(xrange(sample_size), ed)]
+
+        clf = train(mod, classifier, train_data, train_labels, **kwargs)
+
+        all_results = []
+        for i in xrange(N):
+            all_results.append(predict(clf, data[ed]))
+
+        predict_label = predict(clf, data[ed])
+        print 'The individual ', str(ed), ' is of type ', all_results
+
+        scores[ed] = predict_label==labels[ed]
+
+    return np.float(np.count_nonzero(scores))/sample_size
+
+
+
+def leave_perc_out(mod, classifier, data, labels, iterations=10, percent=10, **kwargs):
+    '''Leaves one individual out, trains classifier
+       with the rest of the data and returns the score
+       of matching ids between proposed and predicted labels.
+       Score defines how many trials were successful
+       as a percentage over the total number of trials.
+       Iteration defines the number of trials.
+       Percent defines the percentage of the data that will
+       define the test set of the classifier.
+    '''
+    import random
+
+    sample_size = len(labels)
+    test_size = int(sample_size*percent/100.)
+    scores = np.zeros(iterations)
+
+    #print sample_size, test_size
+
+    for i in xrange(iterations):
+
+        random_inds = random.sample(range(0, sample_size), test_size)
+        kept = np.delete(xrange(sample_size), random_inds)
+
+        clf = train(mod, classifier, data[kept], labels[kept], **kwargs)
+
+        sc = 0.0
+        for ed in random_inds:
+            predict_label = predict(clf, data[ed])
+            sc = sc + float(predict_label==labels[ed])
+
+        scores[i] = float(sc)/float(test_size)
+
+    #print len(random_inds), len(kept), len(random_inds) + len(kept)
+
+    return scores # np.mean(np.count_nonzero(scores))/sample_size
+
+
+def leave_one_out_mixing(mod, classifier, data, labels, **kwargs):
+    '''Leaves one individual out, trains classifier
+       with the rest of the data and returns the score
+       of matching ids between proposed and predicted labels.
+       Score defines how many trials were successful
+       as a percentage over the total number of trials.
+    '''
+    sample_size = len(labels)
+    scores = np.zeros(sample_size)
+
+    separation = np.zeros([len(np.unique(labels)), len(np.unique(labels))])
+
+    sizes = np.zeros(len(np.unique(labels)))
+
+    for i in np.unique(labels):
+        sizes[int(i-1)] = len(np.where(labels==i)[0])
+
+    for ed in xrange(sample_size):
+
+        #print 'Testing ' + str(ed) + ' ...'
+
+        train_data = data[np.delete(xrange(sample_size), ed)]
+        train_labels = labels[np.delete(xrange(sample_size), ed)]
+
+        clf = train(mod, classifier, train_data, train_labels, **kwargs)
+        predict_label = predict(clf, data[ed])
+
+        #print predict_label, labels[ed]
+        separation[int(labels[ed]-1)][int(predict_label-1)] = separation[int(labels[ed]-1)][int(predict_label-1)] + 1./sizes[int(labels[ed]-1)]
+
+        #print 'The individual ' + str(ed) + ' is of type ' + str(predict_label)
+
+        scores[ed] = predict_label==labels[ed]
+
+    return np.float(np.count_nonzero(scores))/sample_size, separation
+
+
+def leave_one_out_multiple(mod, classifier, data, labels, n=10, **kwargs):
+    '''Leaves one individual out, trains classifier
+       with the rest of the data and returns the score
+       of matching ids between proposed and predicted labels.
+       Score defines how many trials were successful
+       as a percentage over the total number of trials.
+    '''
+    sample_size = len(labels)
+    scores = np.zeros(sample_size)
+
+    for ed in xrange(sample_size):
+
+        #print 'Testing ' + str(ed) + ' ...'
+
+        print 'The individual ' + str(ed) + ' is of type ',
+
+        for ni in xrange(n):
+
+            train_data = data[np.delete(xrange(sample_size), ed)]
+            train_labels = labels[np.delete(xrange(sample_size), ed)]
+
+            clf = train(mod, classifier, train_data, train_labels, **kwargs)
+
+            predict_label = predict(clf, data[ed])
+
+            print str(predict_label),
+
+        print ' !'
+
+        scores[ed] = predict_label==labels[ed]
+
+    return np.float(np.count_nonzero(scores))/sample_size
+
+def multi(dat, tar, m='tree', cl='DecisionTreeClassifier', n=10, randomize=False):
+    score = np.zeros(n)
+    if not randomize:
+        for i in xrange(n):
+            score[i] = leave_one_out(m, cl, dat, tar)
+    else:
+        for i in xrange(n):
+            score[i] = leave_one_out(m, cl, dat, np.random.randint(min(tar),max(tar)+1,size=len(tar)))
+
+    return mean(score), std(score)
diff --git a/examples/Advanced/Variability.py → examples/Advanced/variability.py b/examples/Advanced/Variability.py → examples/Advanced/variability.py
diff --git a/examples/analysis.py b/examples/analysis.py
diff --git a/examples/distances_example.py b/examples/distances_example.py
@@ -3,24 +3,26 @@
 
 pop1 = tmd.io.load_population(directory1)
 pop2 = tmd.io.load_population(directory2)
+
 phs1 = [tmd.methods.get_ph_neuron(n, neurite_type='basal') for n in pop1.neurons]
 phs2 = [tmd.methods.get_ph_neuron(n, neurite_type='basal') for n in pop2.neurons]
 
 # Normalize the limits
-xlims, ylims = define_limits(phs1 + phs2)
+xlims, ylims = tmd.analysis.define_limits(phs1 + phs2)
 # Create average images for populations
-imgs1 = [persistence_image(p, xlims=xlims, ylims=ylims) for p in phs1]
-IMG1 = average_ph_image(imgs1)
-imgs2 = [persistence_image(p, xlims=xlims, ylims=ylims) for p in phs2]
-IMG2 = average_ph_image(imgs2)
+imgs1 = [tmd.analysis.persistence_image_data(p, xlims=xlims, ylims=ylims) for p in phs1]
+IMG1 = tmd.analysis.average_image(phs1, xlims=xlims, ylims=ylims)
+imgs2 = [tmd.analysis.persistence_image_data(p, xlims=xlims, ylims=ylims) for p in phs2]
+IMG2 = tmd.analysis.average_image(phs2, xlims=xlims, ylims=ylims)
 
 # You can plot the images if you want to create pretty figures
-average_figure1 = plot_imgs(IMG1, title='', xlims=xlims, ylims=ylims, cmap=cm.jet)
-average_figure2 = plot_imgs(IMG2, title='', xlims=xlims, ylims=ylims, cmap=cm.jet)
+average_figure1 = view.plot.plot_img_basic(IMG1, title='', xlims=xlims, ylims=ylims, cmap=cm.jet)
+average_figure2 = view.plot.plot_img_basic(IMG2, title='', xlims=xlims, ylims=ylims, cmap=cm.jet)
 
 # Create the diffence between the two images
-DIMG = img_diff(IMG1, IMG2) # subtracts IMG2 from IMG1 so anything red IMG1 has more of it and anything blue IMG2 has more of it - or that's how it is supposed to be :)
+DIMG = tmd.analysis.img_diff_data(IMG1, IMG2) # subtracts IMG2 from IMG1 so anything red IMG1 has more of it and anything blue IMG2 has more of it - or that's how it is supposed to be :)
+
 # Plot the difference between them
-diff_image = plot_imgs(DIMG, vmin=-1.0, vmax=1.0) # vmin, vmax important to see changes
+diff_image = view.plot.plot_img_basic(DIMG, vmin=-1.0, vmax=1.0) # vmin, vmax important to see changes
 # Quantify the absolute distance between the two averages
 dist = np.sum(np.abs(DIMG))
diff --git a/requirements.txt b/requirements.txt
diff --git a/requirements_dev.txt b/requirements_dev.txt