### Calc unique values-counts in list/ndarray

Let's see different versions of doing that task and measure the speed of each approach.

In [12]:
import numpy as np
from collections import defaultdict,Counter

In [21]:
def get_values_counts_with_dict_1(atribute_list):
    """with python dictionary (defaultdict)"""    
    values_counts = defaultdict(int)
    
    for val in atribute_list:
        values_counts[val] += 1

    return values_counts    

In [23]:
def get_values_counts_with_list_count(attribute_list):
    """with python list.count() method"""
    values_counts = {}
    
    for val in set(attribute_list):
        values_counts[val] = attribute_list.count(val)

    return values_counts

In [24]:
def get_values_counts_Counter(attribute_list):
    """with python Counter dict subclass"""    
    return Counter(attribute_list)

In [40]:
def get_values_countes_with_np_unique(attribute_list):
    """with np.unique method"""
    values, counts = np.unique(attribute_list, return_counts=True)
    
    return dict(zip(values, counts))

### Speed timing

In [41]:
def timing_values_counts(arr):
    the_list = list(arr)    
    print('Testing on Python list with len: {}'.format(len(the_list)))
    
    print('\n{}:'.format(get_values_counts_with_dict_1.__doc__))
    %timeit -n1 get_values_counts_with_dict_1(the_list)
    
    print('\n{}:'.format(get_values_counts_with_list_count.__doc__))
    %timeit -n1 get_values_counts_with_list_count(the_list)        
    
    print('\n{}:'.format(get_values_counts_Counter.__doc__))
    %timeit -n1 get_values_counts_Counter(the_list)
       
    print('\n{}:'.format(get_values_countes_with_np_unique.__doc__))    
    %timeit -n1 get_values_countes_with_np_unique(arr)

In [48]:
length_factor = 100000

arr = np.repeat(np.array(['red','green','blue']),length_factor)
np.random.shuffle(arr) 

timing_values_counts(arr)

Testing on Python list with len: 300000

with python dictionary (defaultdict):
41.1 ms ± 5.36 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

with python list.count() method:
25.8 ms ± 271 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)

with python Counter dict subclass:
28.5 ms ± 2.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

with np.unique method:
33.1 ms ± 1.39 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [49]:
# get_values_counts_with_dict_1(list(arr))
get_values_counts_with_list_count(list(arr))
get_values_counts_Counter(list(arr))
get_values_countes_with_np_unique(arr)

{'blue': 100000, 'green': 100000, 'red': 100000}