In [1]:
import pymongo
import time
import threading
import random

In [2]:
cl = pymongo.MongoClient("mongodb://localhost:27017")
db = cl["deep"]
col = db["flat"]

In [3]:
col.find_one()

{'_id': ObjectId('6866846b7fb3cacee8f2e148'),
 'number_of_records': 600,
 'activity_sec': 38,
 'application': 'floor',
 'device': 2346,
 'volume_total_bytes': 5855,
 'subscribers': 709}

In [27]:
col.count_documents({"device": {"$lt": 220}})

10850

In [13]:
def prepare():
    col.update_many({"device": {"$lt": 220}}, [{"$set": {"old_device": "$device"}}])
    col.update_many({"device": {"$lt": 220}}, [{"$set": {"device": 0}}])

In [14]:
def undo_prepare():
    col.update_many({"old_device": {"$exists": True}}, [
        {"$set": {"device": "$old_device"}},
        {"$unset": "old_device"}
    ])

In [49]:
indexes = col.list_indexes()
for i in indexes:
    print(i)

SON([('v', 2), ('key', SON([('_id', 1)])), ('name', '_id_')])


In [48]:
col.create_index([("device", 1)])

'device_1'

In [48]:
col.drop_index("device_1")

In [30]:
def calc_perf(old, new):
    rounded_old = round(old)
    rounded_new = round(new)
    ratio = ((rounded_old - rounded_new) / rounded_old) * 100
    print(f"{rounded_old} {rounded_new} {ratio:.1f}")

In [145]:
# Update non-index
def measure_update_1(user_id, duration_list):
    duration = 0

    for _ in range(10):
        start = time.perf_counter()
        col.update_many({"device": {"$lt": 220}}, {"$set": {"application": "deepbench"}})
        end = time.perf_counter()
        duration += end - start

    duration = (duration/10) * 1000
    duration_list[user_id] = duration

In [146]:
def update_non_index(num_of_users):
    threads_list = []
    duration_list = [None] * num_of_users
    
    for x in range(num_of_users):
        t = threading.Thread(target=measure_update_1, args=(x, duration_list,))
        t.start()
        threads_list.append(t)
    
    for t in threads_list:
        t.join()

    avg = (sum(duration_list) / len(duration_list))
    return avg

In [23]:
prepare()

In [147]:
one_user_old = update_non_index(1)
two_user_old = update_non_index(2)
four_user_old = update_non_index(4)

col.create_index([("device", 1)])

one_user_new = update_non_index(1)
two_user_new = update_non_index(2)
four_user_new = update_non_index(4)

In [161]:
calc_perf(one_user_old, one_user_new)
calc_perf(two_user_old, two_user_new)
calc_perf(four_user_old, four_user_new)

343 96 72.0
429 111 74.1
842 178 78.9


In [150]:
col.drop_index("device_1")

In [152]:
def measure_update_2(user_id, duration_list):
    duration = 0

    for _ in range(10):
        num = random.randint(0, 219)
        start = time.perf_counter()
        col.update_many({"device": {"$lt": 220}}, {"$set": {"device": num}})
        end = time.perf_counter()
        duration += end - start

    duration = (duration/10) * 1000
    duration_list[user_id] = duration

In [153]:
def update_index(num_of_users):
    threads_list = []
    duration_list = [None] * num_of_users
    
    for x in range(num_of_users):
        t = threading.Thread(target=measure_update_2, args=(x, duration_list,))
        t.start()
        threads_list.append(t)
    
    for t in threads_list:
        t.join()

    avg = (sum(duration_list) / len(duration_list))
    return avg

In [154]:
one_res_old = update_index(1)
two_res_old = update_index(2)
four_res_old = update_index(4)

In [155]:
col.create_index([("device", 1)])

'device_1'

In [156]:
one_res_new = update_index(1)
two_res_new = update_index(2)
four_res_new = update_index(4)

In [162]:
calc_perf(one_res_old, one_res_new)
calc_perf(two_res_old, two_res_new)
calc_perf(four_res_old, four_res_new)

454 335 26.2
519 388 25.2
968 679 29.9


In [28]:
undo_prepare()

In [12]:
def prepare_3():
    col.update_many({"number_of_records": {"$gt": 978}}, [
        {"$set": {"old_device": "$device"}}
    ])

In [21]:
def undo_prepare_3():
    col.update_many({"old_device": {"$exists": True}}, [
        {"$set": {"device": "$old_device"}},
        {"$unset": "old_device"}
    ])

In [36]:
prepare_3()

In [44]:
col.find_one({"number_of_records": {"$gt": 978}})

{'_id': ObjectId('6866846b7fb3cacee8f2e14a'),
 'number_of_records': 999,
 'activity_sec': 93,
 'application': 'effect',
 'device': 2257,
 'volume_total_bytes': 2606,
 'subscribers': 191,
 'old_device': 767}

In [50]:
col.find_one({"number_of_records": {"$gt": 978}})

{'_id': ObjectId('6866846b7fb3cacee8f2e14a'),
 'number_of_records': 999,
 'activity_sec': 93,
 'application': 'effect',
 'device': 767,
 'volume_total_bytes': 2606,
 'subscribers': 191}

In [43]:
col.count_documents({"number_of_records": {"$gt": 978}})

10578

In [27]:
def measure_update_3(user_id, duration_list):
    duration = 0

    for _ in range(10):
        num = random.randint(0, 9999)
        start = time.perf_counter()
        col.update_many({"number_of_records": {"$gt": 978}}, {"$set": {"device": num}})
        end = time.perf_counter()
        duration += end - start

    duration = (duration/10) * 1000
    duration_list[user_id] = duration

In [28]:
def update_non_index(num_of_users):
    threads_list = []
    duration_list = [None] * num_of_users
    
    for x in range(num_of_users):
        t = threading.Thread(target=measure_update_3, args=(x, duration_list,))
        t.start()
        threads_list.append(t)
    
    for t in threads_list:
        t.join()

    avg = (sum(duration_list) / len(duration_list))
    return avg

In [41]:
one_nind_old = update_non_index(1)
two_nind_old = update_non_index(1)
four_nind_old = update_non_index(1)

In [42]:
col.create_index([("device", 1)])

'device_1'

In [45]:
one_nind_new = update_non_index(1)
two_nind_new = update_non_index(1)
four_nind_new = update_non_index(1)


In [46]:
calc_perf(one_nind_old, one_nind_new)
calc_perf(two_nind_old, two_nind_new)
calc_perf(four_nind_old, four_nind_new)

490 597 -21.8
464 582 -25.4
465 568 -22.2


In [47]:
undo_prepare_3()