In [1]:
import pymongo
import time
import threading
import random

In [2]:
cl = pymongo.MongoClient("mongodb://localhost:27017")
db = cl["deep"]
col = db["flat"]

In [61]:
col.find_one({"device": 2})

{'_id': ObjectId('68667473f59ce28b2e27d010'),
 'number_of_records': 57,
 'activity_sec': 47,
 'application': 'education',
 'device': 2,
 'volume_total_bytes': 8310,
 'subscribers': 464}

In [62]:
col.count_documents({})

500000

In [63]:
col.count_documents({"old_device": {"$exists": True}})

250211

In [67]:
indexes = col.list_indexes()
for i in indexes:
    print(i)

SON([('v', 2), ('key', SON([('_id', 1)])), ('name', '_id_')])


In [64]:
def calc_perf(old, new):
    rounded_old = round(old)
    rounded_new = round(new)
    ratio = ((rounded_old - rounded_new) / rounded_old) * 100
    print(f"{rounded_old} {rounded_new} {ratio:.1f}")

In [54]:
def create_batch(batch_size):
    batch = []
    for _ in range(batch_size):
        el = {
            "num_of_records": random.randint(0,999),
            "activity_sec": random.randint(0,99),
            "application": "insert_batch_member",
            "device": random.randint(5000,9999),
            "volume_total_bytes": random.randint(0,5000),
            "subscribers": random.randint(0,999),
        }
        batch.append(el)
    return batch

In [55]:
def insert_mocks(size):
    mocks = create_batch(size)
    col.insert_many(mocks)

In [56]:
def prepare():
    col.update_many({"device": {"$gte": 5000}}, [
        {
            "$set": {"old_device": "$device"}
        }
    ])
    col.update_many({"device": {"$gte": 5000}}, [
        {
            "$set": {"device": 0}
        }
    ])

In [57]:
def undo_prepare():
    col.delete_many({"device": {"$gte": 5000}})
    col.update_many({"old_device": {"$exists": True}}, [
        {"$set": {"device": "$old_device"}},
        {"$unset": "old_device"}
    ])

In [58]:
def measure_delete(user_id, duration_list):
    duration = 0
    
    for _ in range(10):
        insert_mocks(50000)
        start = time.perf_counter()
        col.delete_many({"device": {"$gte": 5000}})
        end = time.perf_counter()
        
        duration += end - start
    
    # take average duration and convert to ms
    duration = (duration / 10) * 1000
    duration_list[user_id] = duration

In [59]:
def delete_test(num_of_users):
    threads_list=[]
    duration_list = [None] * num_of_users
        
    for x in range(num_of_users):
        t = threading.Thread(target=measure_delete, args=(x, duration_list,))
        t.start()
        threads_list.append(t)
    
    for t in threads_list:
        t.join()

    avg = (sum(duration_list) / len(duration_list))
    return avg

In [60]:
prepare()

In [66]:
one_old = delete_test(1)
two_old = delete_test(2)
four_old = delete_test(4)

In [68]:
index_name = col.create_index([("device", 1)])

In [69]:
one_new = delete_test(1)
two_new = delete_test(2)
four_new = delete_test(4)

In [70]:
calc_perf(one_old, one_new)
calc_perf(two_old, two_new)
calc_perf(four_old, four_new)

667 660 1.0
1246 1494 -19.9
2919 3990 -36.7


In [71]:
col.drop_index("device_1")

In [72]:
undo_prepare()