Skip to content

Commit

Permalink
Cleaning up code to simplify applications
Browse files Browse the repository at this point in the history
  • Loading branch information
lidakanari committed May 14, 2018
1 parent ab560ae commit 2f8a80b
Show file tree
Hide file tree
Showing 11 changed files with 382 additions and 303 deletions.
6 changes: 3 additions & 3 deletions doc/Installation.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ virtualenv tmd_test
# Install tmd module
pip install -e TMD

# If you see the error "Command "python setup.py egg_info" failed with error code 1 in /TMD" try:
pip install pystan

# Now your module is ready to use
ipython
# Import module
import tmd
205 changes: 205 additions & 0 deletions examples/Advanced/classification.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
list_of_modules = ['discriminant_analysis', 'discriminant_analysis', 'tree']

list_of_classifiers =['LinearDiscriminantAnalysis', 'QuadraticDiscriminantAnalysis', 'DecisionTreeClassifier']


def train(mod, classifier, data, labels, **kwargs):
'''Trains the classifier from mod of sklearn
with data and targets.
Returns a fited classifier.
'''
import importlib

clas_mod = importlib.import_module('sklearn.' + mod)
clf = getattr(clas_mod, classifier)()
clf.set_params(**kwargs)

clf.fit(data, labels)

return clf

def predict(clf, data):
'''Predict label for data for the trained classifier clf.
Returns the index of the predicted class
for each datapoint in data.
'''
predict_label = clf.predict([data])

return predict_label[0]

def leave_one_out(mod, classifier, data, labels, **kwargs):
'''Leaves one individual out, trains classifier
with the rest of the data and returns the score
of matching ids between proposed and predicted labels.
Score defines how many trials were successful
as a percentage over the total number of trials.
'''
sample_size = len(labels)
scores = np.zeros(sample_size)

for ed in xrange(sample_size):

#print 'Testing ' + str(ed) + ' ...'

train_data = data[np.delete(xrange(sample_size), ed)]
train_labels = labels[np.delete(xrange(sample_size), ed)]

clf = train(mod, classifier, train_data, train_labels, **kwargs)
predict_label = predict(clf, data[ed])

#print 'The individual ' + str(ed) + ' is of type ' + str(predict_label)

scores[ed] = predict_label==labels[ed]

return np.float(np.count_nonzero(scores))/sample_size


def leave_one_out_statistics(mod, classifier, data, labels, N=10, **kwargs):
'''Leaves one individual out, trains classifier
with the rest of the data and returns the score
of matching ids between proposed and predicted labels.
Score defines how many trials were successful
as a percentage over the total number of trials.
'''
sample_size = len(labels)
scores = np.zeros(sample_size)

for ed in xrange(sample_size):

#print 'Testing ' + str(ed) + ' ...'

train_data = data[np.delete(xrange(sample_size), ed)]
train_labels = labels[np.delete(xrange(sample_size), ed)]

clf = train(mod, classifier, train_data, train_labels, **kwargs)

all_results = []
for i in xrange(N):
all_results.append(predict(clf, data[ed]))

predict_label = predict(clf, data[ed])
print 'The individual ', str(ed), ' is of type ', all_results

scores[ed] = predict_label==labels[ed]

return np.float(np.count_nonzero(scores))/sample_size



def leave_perc_out(mod, classifier, data, labels, iterations=10, percent=10, **kwargs):
'''Leaves one individual out, trains classifier
with the rest of the data and returns the score
of matching ids between proposed and predicted labels.
Score defines how many trials were successful
as a percentage over the total number of trials.
Iteration defines the number of trials.
Percent defines the percentage of the data that will
define the test set of the classifier.
'''
import random

sample_size = len(labels)
test_size = int(sample_size*percent/100.)
scores = np.zeros(iterations)

#print sample_size, test_size

for i in xrange(iterations):

random_inds = random.sample(range(0, sample_size), test_size)
kept = np.delete(xrange(sample_size), random_inds)

clf = train(mod, classifier, data[kept], labels[kept], **kwargs)

sc = 0.0
for ed in random_inds:
predict_label = predict(clf, data[ed])
sc = sc + float(predict_label==labels[ed])

scores[i] = float(sc)/float(test_size)

#print len(random_inds), len(kept), len(random_inds) + len(kept)

return scores # np.mean(np.count_nonzero(scores))/sample_size


def leave_one_out_mixing(mod, classifier, data, labels, **kwargs):
'''Leaves one individual out, trains classifier
with the rest of the data and returns the score
of matching ids between proposed and predicted labels.
Score defines how many trials were successful
as a percentage over the total number of trials.
'''
sample_size = len(labels)
scores = np.zeros(sample_size)

separation = np.zeros([len(np.unique(labels)), len(np.unique(labels))])

sizes = np.zeros(len(np.unique(labels)))

for i in np.unique(labels):
sizes[int(i-1)] = len(np.where(labels==i)[0])

for ed in xrange(sample_size):

#print 'Testing ' + str(ed) + ' ...'

train_data = data[np.delete(xrange(sample_size), ed)]
train_labels = labels[np.delete(xrange(sample_size), ed)]

clf = train(mod, classifier, train_data, train_labels, **kwargs)
predict_label = predict(clf, data[ed])

#print predict_label, labels[ed]
separation[int(labels[ed]-1)][int(predict_label-1)] = separation[int(labels[ed]-1)][int(predict_label-1)] + 1./sizes[int(labels[ed]-1)]

#print 'The individual ' + str(ed) + ' is of type ' + str(predict_label)

scores[ed] = predict_label==labels[ed]

return np.float(np.count_nonzero(scores))/sample_size, separation


def leave_one_out_multiple(mod, classifier, data, labels, n=10, **kwargs):
'''Leaves one individual out, trains classifier
with the rest of the data and returns the score
of matching ids between proposed and predicted labels.
Score defines how many trials were successful
as a percentage over the total number of trials.
'''
sample_size = len(labels)
scores = np.zeros(sample_size)

for ed in xrange(sample_size):

#print 'Testing ' + str(ed) + ' ...'

print 'The individual ' + str(ed) + ' is of type ',

for ni in xrange(n):

train_data = data[np.delete(xrange(sample_size), ed)]
train_labels = labels[np.delete(xrange(sample_size), ed)]

clf = train(mod, classifier, train_data, train_labels, **kwargs)

predict_label = predict(clf, data[ed])

print str(predict_label),

print ' !'

scores[ed] = predict_label==labels[ed]

return np.float(np.count_nonzero(scores))/sample_size

def multi(dat, tar, m='tree', cl='DecisionTreeClassifier', n=10, randomize=False):
score = np.zeros(n)
if not randomize:
for i in xrange(n):
score[i] = leave_one_out(m, cl, dat, tar)
else:
for i in xrange(n):
score[i] = leave_one_out(m, cl, dat, np.random.randint(min(tar),max(tar)+1,size=len(tar)))

return mean(score), std(score)
File renamed without changes.
99 changes: 0 additions & 99 deletions examples/analysis.py

This file was deleted.

20 changes: 11 additions & 9 deletions examples/distances_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,26 @@

pop1 = tmd.io.load_population(directory1)
pop2 = tmd.io.load_population(directory2)

phs1 = [tmd.methods.get_ph_neuron(n, neurite_type='basal') for n in pop1.neurons]
phs2 = [tmd.methods.get_ph_neuron(n, neurite_type='basal') for n in pop2.neurons]

# Normalize the limits
xlims, ylims = define_limits(phs1 + phs2)
xlims, ylims = tmd.analysis.define_limits(phs1 + phs2)
# Create average images for populations
imgs1 = [persistence_image(p, xlims=xlims, ylims=ylims) for p in phs1]
IMG1 = average_ph_image(imgs1)
imgs2 = [persistence_image(p, xlims=xlims, ylims=ylims) for p in phs2]
IMG2 = average_ph_image(imgs2)
imgs1 = [tmd.analysis.persistence_image_data(p, xlims=xlims, ylims=ylims) for p in phs1]
IMG1 = tmd.analysis.average_image(phs1, xlims=xlims, ylims=ylims)
imgs2 = [tmd.analysis.persistence_image_data(p, xlims=xlims, ylims=ylims) for p in phs2]
IMG2 = tmd.analysis.average_image(phs2, xlims=xlims, ylims=ylims)

# You can plot the images if you want to create pretty figures
average_figure1 = plot_imgs(IMG1, title='', xlims=xlims, ylims=ylims, cmap=cm.jet)
average_figure2 = plot_imgs(IMG2, title='', xlims=xlims, ylims=ylims, cmap=cm.jet)
average_figure1 = view.plot.plot_img_basic(IMG1, title='', xlims=xlims, ylims=ylims, cmap=cm.jet)
average_figure2 = view.plot.plot_img_basic(IMG2, title='', xlims=xlims, ylims=ylims, cmap=cm.jet)

# Create the diffence between the two images
DIMG = img_diff(IMG1, IMG2) # subtracts IMG2 from IMG1 so anything red IMG1 has more of it and anything blue IMG2 has more of it - or that's how it is supposed to be :)
DIMG = tmd.analysis.img_diff_data(IMG1, IMG2) # subtracts IMG2 from IMG1 so anything red IMG1 has more of it and anything blue IMG2 has more of it - or that's how it is supposed to be :)

# Plot the difference between them
diff_image = plot_imgs(DIMG, vmin=-1.0, vmax=1.0) # vmin, vmax important to see changes
diff_image = view.plot.plot_img_basic(DIMG, vmin=-1.0, vmax=1.0) # vmin, vmax important to see changes
# Quantify the absolute distance between the two averages
dist = np.sum(np.abs(DIMG))
8 changes: 0 additions & 8 deletions requirements.txt

This file was deleted.

6 changes: 0 additions & 6 deletions requirements_dev.txt

This file was deleted.

0 comments on commit 2f8a80b

Please sign in to comment.