# Indexing performance

In [30]:
import pandas
import numpy

## Performance tests for full-indexing

In [31]:
df_a = pandas.DataFrame(numpy.random.random((1000,10)))
df_b = pandas.DataFrame(numpy.random.random((1000,10)))

df_a.index.name = 'a'
df_b.index.name = 'b'

df_a.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9
a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.035515,0.246739,0.047663,0.087562,0.076042,0.432226,0.805492,0.742564,0.555524,0.870628
1,0.361707,0.850721,0.868866,0.091685,0.291206,0.190513,0.123822,0.212684,0.625863,0.53953
2,0.316949,0.616324,0.450659,0.344823,0.838458,0.096856,0.87824,0.064914,0.03578,0.839569
3,0.859921,0.406412,0.577638,0.025337,0.53865,0.808297,0.892857,0.287159,0.903277,0.784371
4,0.033833,0.352959,0.945257,0.222103,0.274195,0.728634,0.689249,0.303446,0.075378,0.922597


In [32]:
def _fullindex1(A, B):

    # merge_col is used to make a full index.
    A_merge = pandas.DataFrame({'merge_col':1, A.index.name: A.index.values})
    B_merge = pandas.DataFrame({'merge_col':1, B.index.name: B.index.values})

    pairs = A_merge.merge(B_merge, how='inner', on='merge_col').set_index([A.index.name, B.index.name])

    return pairs.index

In [33]:
def _fullindex2(A, B):

    pairs_array = [numpy.tile(A.index.values, len(B)), numpy.repeat(B.index.values, len(A))]
    
    return pandas.MultiIndex.from_arrays(pairs_array, names=[A.index.name, B.index.name])

In [34]:
def _fullindex3(A,B):
    
    return pandas.MultiIndex.from_product([A.index.values, B.index.values], names=[A.index.name, B.index.name])

In [35]:
print (len(_fullindex1(df_a, df_b)))
print (len(_fullindex2(df_a, df_b)))
print (len(_fullindex3(df_a, df_b)))

1000000
1000000
1000000


In [36]:
%timeit _fullindex1(df_a, df_b)
%timeit _fullindex2(df_a, df_b)
%timeit _fullindex3(df_a, df_b)

10 loops, best of 3: 96.2 ms per loop
10 loops, best of 3: 40.4 ms per loop
10 loops, best of 3: 8.35 ms per loop
