### Calc unique values-counts in list/ndarray

Let's see different versions of doing that task and measure the speed of each approach.

In [1]:
import numpy as np
from collections import defaultdict,Counter

In [2]:
def get_values_counts_as_dict_1(atribute_list):
    """values_counts counter with python dictionary (defaultdict)"""    
    values_counts = defaultdict(int)
    
    for val in atribute_list:
        values_counts[val] += 1

    return values_counts    

In [3]:
def get_values_counts_as_dict_2(attribute_list):
    """values_counts counter with python list.count() method"""
    values_counts = {}
    
    for val in set(attribute_list):
        values_counts[val] = attribute_list.count(val)

    return values_counts

In [4]:
def get_values_counts_as_dict_3(attribute_list):
    """values_counts counter with python Counter dict subclass"""    
    return Counter(attribute_list)

In [5]:
def get_values_countes_as_2d_array(attribute_list):
    """values_counts counter with np.unique method"""
    values, counts = np.unique(attribute_list, return_counts=True)
    
    return [values, counts]

### Speed timing

In [6]:
def timing_values_counts(arr):
    the_list = list(arr)    
    print('Testing on Python list with len: {}'.format(len(the_list)))
    
    print('\n{}:'.format(get_values_counts_as_dict_1.__doc__))
    %timeit -n1 get_values_counts_as_dict_1(the_list)
    
    print('\n{}:'.format(get_values_counts_as_dict_2.__doc__))
    %timeit -n1 get_values_counts_as_dict_2(the_list)        
    
    print('\n{}:'.format(get_values_counts_as_dict_3.__doc__))
    %timeit -n1 get_values_counts_as_dict_3(the_list)
       

    print('\nTiming get_values_counts_as_2d_array')
    %timeit -n1 get_values_counts_as_2d_array(arr)

In [7]:
length_factor = 100000

arr = np.repeat(np.array(['red','green','blue']),length_factor)
np.random.shuffle(arr) 

timing_values_counts(arr)

NameError: name 'timing_values_countes' is not defined