# Speed testing of frequency counts

In [18]:
import Orange
from scipy.stats import itemfreq
import numpy as np
import timeit

def wrapper(func, *args, **kwargs):
    def wrapped():
        return func(*args, **kwargs)
    return wrapped

In [54]:
no_samples = 100000
no_unique_values = 100

## Testing for an array with 1 column

In [50]:
x = np.array(np.random.randint(no_unique_values, size=no_samples)).T.astype(np.float32)

wrapped = wrapper(Orange.statistics.util.bincount, x)
print("Orange binount:", timeit.timeit(wrapped, number=3) / 3)

wrapped = wrapper(np.unique, x, return_counts=True)
print("Numpy unique:", timeit.timeit(wrapped, number=3) / 3)

wrapped = wrapper(itemfreq, x)
print("Scipy itemfreq:", timeit.timeit(wrapped, number=3) / 3)

Orange binount: 0.008274422999875242
Numpy unique: 0.05659247166674201
Scipy itemfreq: 0.1377524246666629


## Testing for an array with 2 columns

In [51]:
x = np.array([np.random.randint(no_unique_values, size=no_samples) for i in range(2)]).T.astype(np.float32)

def unique_count(*a):
    no_att = len(a)
    M = np.column_stack(a)
    M_cont = np.ascontiguousarray(M).view(np.dtype((np.void, M.dtype.itemsize * no_att)))
    return np.unique(M_cont, return_counts=True)

wrapped = wrapper(Orange.statistics.util.contingency, x[:,0], x[:,1])
print("Orange contingency:", timeit.timeit(wrapped, number=3) / 3)

wrapped = wrapper(unique_count, x[:,0], x[:,1])
print("Numpy unique:", timeit.timeit(wrapped, number=3) / 3)

Orange contingency: 0.12143596499997027
Numpy unique: 0.36810736699999325


## Testing for an array with 3 or more columns

In [52]:
x = np.array([np.random.randint(no_unique_values, size=no_samples) for i in range(3)]).T.astype(np.float32)

wrapped = wrapper(unique_count, x[:,0], x[:,1], x[:,2])
print("Numpy unique:", timeit.timeit(wrapped, number=3) / 3)

Numpy unique: 0.48144968199994764


In [53]:
x = np.array([np.random.randint(no_unique_values, size=no_samples) for i in range(10)]).T.astype(np.float32)

wrapped = wrapper(unique_count, x[:,0], x[:,1], x[:,2], x[:,3], x[:,4], x[:,5], x[:,6], x[:,7], x[:,8], x[:,9])
print("Numpy unique:", timeit.timeit(wrapped, number=3) / 3)

Numpy unique: 0.627787733666537
