# Test for string/enum substitution speed

In [1]:
import pandas as pd
import numpy as np
import time

In [26]:
size_index = 1000
size_cols = 5000
str_rename = {i:f"cat{i}" for i in range(1,10+1)}

def create_df():
    arr = np.random.randint(low=1, high=10, size=(size_index,size_cols))
    cols = [f'col{i}' for i in range(size_cols)]
    df = pd.DataFrame(data=arr, columns=cols)
    return df.astype('category', copy=False)


In [27]:
t_in = time.time()
df = create_df()
for v in df.columns.values:
    df[v] = df[v].cat.rename_categories(str_rename)
print(f"Elapsed: {time.time() - t_in:2.3f}s")
display(df)

Elapsed: 3.088s


Unnamed: 0,col0,col1,col2,col3,col4,col5,col6,col7,col8,col9,...,col4990,col4991,col4992,col4993,col4994,col4995,col4996,col4997,col4998,col4999
0,cat7,cat5,cat6,cat9,cat8,cat1,cat4,cat6,cat8,cat4,...,cat7,cat9,cat4,cat8,cat4,cat2,cat6,cat3,cat6,cat4
1,cat4,cat9,cat2,cat5,cat9,cat4,cat3,cat1,cat5,cat6,...,cat9,cat2,cat5,cat2,cat5,cat9,cat7,cat4,cat1,cat1
2,cat2,cat3,cat3,cat5,cat9,cat5,cat4,cat2,cat4,cat1,...,cat8,cat1,cat8,cat3,cat5,cat9,cat2,cat2,cat8,cat6
3,cat6,cat6,cat6,cat9,cat9,cat5,cat7,cat1,cat7,cat6,...,cat6,cat8,cat8,cat9,cat3,cat5,cat4,cat2,cat8,cat3
4,cat7,cat4,cat2,cat9,cat2,cat4,cat3,cat7,cat9,cat5,...,cat9,cat2,cat2,cat1,cat8,cat7,cat7,cat2,cat8,cat6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,cat9,cat3,cat9,cat2,cat3,cat3,cat9,cat8,cat7,cat4,...,cat4,cat6,cat5,cat4,cat3,cat6,cat8,cat3,cat8,cat8
996,cat6,cat9,cat2,cat3,cat2,cat9,cat4,cat5,cat7,cat6,...,cat3,cat2,cat6,cat2,cat8,cat2,cat8,cat2,cat8,cat8
997,cat5,cat4,cat3,cat3,cat6,cat1,cat7,cat3,cat4,cat7,...,cat4,cat8,cat5,cat2,cat5,cat3,cat7,cat4,cat3,cat1
998,cat2,cat2,cat8,cat1,cat5,cat8,cat2,cat6,cat8,cat1,...,cat2,cat5,cat9,cat2,cat6,cat7,cat3,cat8,cat9,cat8


In [4]:
df = create_df()
t_in = time.time()
df = df.apply(lambda s: s.cat.rename_categories(str_rename), axis=1)
print(f"Elapsed: {time.time() - t_in:2.3f}s")
display(df)

Elapsed: 114.979s


Unnamed: 0,col0,col1,col2,col3,col4,col5,col6,col7,col8,col9,...,col4990,col4991,col4992,col4993,col4994,col4995,col4996,col4997,col4998,col4999
0,cat1,cat6,cat7,cat7,cat6,cat8,cat7,cat5,cat9,cat6,...,cat7,cat4,cat1,cat5,cat4,cat9,cat4,cat9,cat9,cat5
1,cat3,cat2,cat5,cat9,cat3,cat8,cat6,cat9,cat8,cat3,...,cat9,cat4,cat1,cat1,cat6,cat6,cat2,cat4,cat5,cat8
2,cat8,cat3,cat1,cat6,cat4,cat8,cat2,cat1,cat2,cat1,...,cat3,cat5,cat7,cat2,cat5,cat7,cat7,cat6,cat9,cat3
3,cat3,cat5,cat5,cat8,cat3,cat5,cat8,cat6,cat1,cat1,...,cat3,cat2,cat3,cat9,cat4,cat5,cat6,cat4,cat7,cat4
4,cat9,cat6,cat7,cat2,cat8,cat4,cat8,cat7,cat1,cat2,...,cat2,cat8,cat4,cat2,cat1,cat5,cat5,cat1,cat6,cat8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,cat7,cat2,cat3,cat5,cat9,cat9,cat8,cat4,cat1,cat3,...,cat5,cat5,cat1,cat4,cat2,cat4,cat3,cat7,cat6,cat9
996,cat3,cat7,cat1,cat2,cat8,cat8,cat2,cat7,cat1,cat1,...,cat4,cat4,cat9,cat6,cat8,cat9,cat6,cat4,cat5,cat2
997,cat9,cat1,cat7,cat4,cat3,cat7,cat8,cat1,cat7,cat6,...,cat8,cat3,cat4,cat3,cat7,cat6,cat8,cat1,cat9,cat9
998,cat4,cat5,cat1,cat5,cat9,cat1,cat4,cat1,cat1,cat4,...,cat8,cat4,cat9,cat5,cat2,cat2,cat8,cat5,cat7,cat7


In [21]:
df = create_df()
t_in = time.time()
cols = df.columns
df.update({i:df[i].cat.rename_categories(str_rename) for i in list(cols)})
print(f"Elapsed: {time.time() - t_in:2.3f}s")
display(df)

Elapsed: 2.912s


Unnamed: 0,col0,col1,col2,col3,col4,col5,col6,col7,col8,col9,...,col4990,col4991,col4992,col4993,col4994,col4995,col4996,col4997,col4998,col4999
0,cat4,cat4,cat4,cat4,cat9,cat1,cat3,cat8,cat1,cat7,...,cat1,cat2,cat7,cat6,cat1,cat1,cat6,cat1,cat5,cat2
1,cat3,cat7,cat9,cat8,cat7,cat5,cat6,cat6,cat5,cat3,...,cat6,cat5,cat7,cat3,cat8,cat3,cat8,cat5,cat7,cat9
2,cat5,cat8,cat7,cat4,cat7,cat5,cat8,cat2,cat7,cat7,...,cat1,cat1,cat9,cat2,cat1,cat4,cat8,cat5,cat7,cat5
3,cat8,cat1,cat2,cat1,cat6,cat8,cat2,cat8,cat5,cat6,...,cat6,cat9,cat9,cat5,cat9,cat1,cat1,cat3,cat1,cat5
4,cat6,cat6,cat1,cat8,cat8,cat3,cat2,cat2,cat1,cat8,...,cat2,cat5,cat6,cat7,cat8,cat9,cat9,cat9,cat3,cat7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,cat3,cat6,cat2,cat2,cat1,cat8,cat7,cat4,cat3,cat8,...,cat5,cat3,cat6,cat4,cat5,cat3,cat5,cat3,cat1,cat3
996,cat3,cat7,cat7,cat2,cat8,cat8,cat3,cat2,cat3,cat9,...,cat3,cat3,cat5,cat4,cat9,cat2,cat2,cat4,cat2,cat9
997,cat3,cat2,cat6,cat2,cat9,cat3,cat7,cat3,cat9,cat3,...,cat3,cat7,cat8,cat4,cat6,cat3,cat3,cat9,cat2,cat6
998,cat8,cat4,cat4,cat3,cat1,cat7,cat1,cat7,cat9,cat1,...,cat3,cat5,cat8,cat9,cat4,cat7,cat9,cat3,cat6,cat3
