### Calc unique values-counts in list/ndarray

Let's see different versions of doing that task and measure the speed of each approach.

In [75]:
import numpy as np
from collections import defaultdict,Counter

In [76]:
def get_values_counts_as_dict_1(atribute_list):
    """values_counts counter with python dictionary"""    
    values_counts = defaultdict(int)    
    for val in atribute_list:
        values_counts[val] += 1

    return values_counts    

In [77]:
def get_values_counts_as_dict_2(attribute_list):
    """values_counts counter with python list.count() method"""
    values_counts = {}
    
    for val in attribute_list:
        values_counts[val] = attribute_list.count(val)

    return values_counts

In [78]:
def get_values_counts_as_dict_3(attribute_list):
    """values_counts counter with python Counter dict subclass"""    
    return Counter(attribute_list)

In [79]:
def get_values_countes_as_2d_array(attribute_list):
    """values_counts counter with np.unique method"""
    values, counts = np.unique(attribute_list, return_counts=True)
    
    return [values, counts]

### Speed profiling

In [80]:
def timing_values_countes(arr):
    the_list = list(arr)    
    print('Testing on Python list with len: {}'.format(len(the_list)))
    
    print('\n{}:'.format(get_values_counts_as_dict_1.__doc__))
    %timeit -n1 get_values_counts_as_dict_1(the_list)
    
    print('\n{}:'.format(get_values_counts_as_dict_2.__doc__))
    %timeit -n1 get_values_counts_as_dict_2(the_list)        
    
    print('\n{}:'.format(get_values_counts_as_dict_3.__doc__))
    %timeit -n1 get_values_counts_as_dict_3(the_list)
       

#     print('\nTiming get_values_countes_as_2d_array')
#     %timeit -n1 get_values_countes_as_2d_array(the_list)

In [81]:
length_factor = 1000

arr = np.repeat(np.array(['red','green','blue']),length_factor)
np.random.shuffle(arr) 

timing_values_countes(arr)

Testing on Python list with len: 3000

values_counts counter with python dictionary:
562 µs ± 114 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)

values_counts counter with python list.count() method:
127 ms ± 4.78 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

values_counts counter with python Counter dict subclass:
251 µs ± 4.1 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
