In [1]:
import pandas as pd
import numpy as np
import time, random
from tqdm import tqdm

In [2]:
iter_num = 10000

# List Append

In [3]:
time_start = time.time()

list_temp = []
for i in tqdm(range(iter_num)):
    a = random.randint(50, 100)
    b = random.randint(50, 100)
    list_temp.append([a, b])

df = pd.DataFrame(list_temp, columns=['input_a', 'input_b'])
time_delta = time.time() - time_start
print("%.3fs" % time_delta)
print(df.shape)

100%|██████████| 10000/10000 [00:00<00:00, 629425.69it/s]

0.028s
(10000, 2)





# Numpy Append

In [4]:
time_start = time.time()

list_temp = np.array([])
for i in tqdm(range(iter_num)):
    a = random.randint(50, 100)
    b = random.randint(50, 100)
    data_temp = np.array([[a, b]])
    if len(list_temp) > 0:
        list_temp = np.append(list_temp, data_temp, axis=0)
    else:
        list_temp = data_temp

df = pd.DataFrame(list_temp, columns=['input_a', 'input_b'])
time_delta = time.time() - time_start
print("%.3fs" % time_delta)
print(df.shape)

100%|██████████| 10000/10000 [00:00<00:00, 162354.71it/s]

0.063s
(10000, 2)





# Pandas Append

In [5]:
time_start = time.time()

df = pd.DataFrame(columns=['input_a', 'input_b'])
for i in tqdm(range(iter_num)):
    a = random.randint(50, 100)
    b = random.randint(50, 100)
    df_temp = pd.Series([a, b], index=df.columns)
    df = df.append(df_temp, ignore_index=True)

time_delta = time.time() - time_start
print("%.3fs" % time_delta)
print(df.shape)

100%|██████████| 10000/10000 [00:05<00:00, 1870.42it/s]

5.349s
(10000, 2)





# Pandas Concat

In [6]:
time_start = time.time()

df = pd.DataFrame(columns=['input_a', 'input_b'])
for i in tqdm(range(iter_num)):
    a = random.randint(50, 100)
    b = random.randint(50, 100)
    df_temp = pd.DataFrame([[a, b]], index=[i], columns=df.columns)
    df = pd.concat([df, df_temp], axis=0)

time_delta = time.time() - time_start
print("%.3fs" % time_delta)
print(df.shape)

100%|██████████| 10000/10000 [00:03<00:00, 2971.48it/s]

3.368s
(10000, 2)





# Pandas Loc

In [7]:
time_start = time.time()

df = pd.DataFrame(columns=['input_a', 'input_b'])
for i in tqdm(range(iter_num)):
    a = random.randint(50, 100)
    b = random.randint(50, 100)
    df.loc[i] = [a, b]

time_delta = time.time() - time_start
print("%.3fs" % time_delta)
print(df.shape)

100%|██████████| 10000/10000 [00:07<00:00, 1341.04it/s]

7.459s
(10000, 2)





# Pandas from_dict

## key='columns' (default)

In [8]:
time_start = time.time()

dict_col = {'input_a': [], 'input_b': []}
for i in tqdm(range(iter_num)):
    a = random.randint(50, 100)
    b = random.randint(50, 100)
    dict_col['input_a'].append(a)
    dict_col['input_b'].append(b)

df = pd.DataFrame.from_dict(dict_col, orient='columns')
time_delta = time.time() - time_start
print("%.3fs" % time_delta)
print(df.shape)

100%|██████████| 10000/10000 [00:00<00:00, 496050.33it/s]

0.026s
(10000, 2)





## key='index'

In [9]:
time_start = time.time()

dict_idx = {}
for i in tqdm(range(iter_num)):
    a = random.randint(50, 100)
    b = random.randint(50, 100)
    dict_idx[i] = [a, b]

df = pd.DataFrame.from_dict(dict_idx, orient='index', columns=['input_a', 'input_b'])
time_delta = time.time() - time_start
print("%.3fs" % time_delta)
print(df.shape)

100%|██████████| 10000/10000 [00:00<00:00, 585453.23it/s]

0.023s
(10000, 2)



