In [1]:
import pymongo
import time
import threading
import random
from pymongo.collection import Collection

In [2]:
cl = pymongo.MongoClient("mongodb://localhost:27017")
db = cl["deep"]
obj1 = db["obj1"]
obj2 = db["obj2"]
obj4 = db["obj4"]
obj8 = db["obj8"]
coll_list = [obj1, obj2, obj4, obj8]
user_list = [1, 2, 4]
depth_list = [1, 2, 4, 8]

In [3]:
def create_batch(coll: Collection, size: int):
    new = list(coll.find({},{"_id": 0}).limit(size))
    for el in new:
        el["inserted"] = True
    return new

In [4]:
def delete_batch():
    for c in coll_list:
        print(c.delete_many({"inserted": True}))

In [5]:
def measure_insert(user_id: int, duration_list: list[int], coll: Collection, depth: int):
    batch = create_batch(coll, 1000)
    
    start = time.perf_counter()
    coll.insert_many(batch)
    end = time.perf_counter()
    duration = (end - start) * 1000

    duration_list[user_id] = duration

In [6]:
def test_insert(num_of_users, depth):
    threads_list = []
    duration_list = [None] * num_of_users
    coll = coll_list[depth]

    for x in range (num_of_users):
        t = threading.Thread(target=measure_insert, args=(x, duration_list, coll, depth,))
        t.start()
        threads_list.append(t)

    for t in threads_list:
        t.join()
        
    result = sum(duration_list) / len(duration_list)
    return result

In [7]:
def execute_insert():
    u_len = len(user_list)
    d_len = len(depth_list)
    result = [[0 for _ in range(d_len)] for _ in range(u_len)]
    
    for u in range(u_len):
        print(f"user {user_list[u]: }")
        print(f"  depth:", end=" ")
        for d in range(d_len):
            result[u][d] = test_insert(user_list[u], d)
            print(f"{d}", end=" ")
        print("---")
    return result

In [8]:
def calculate_diffs(old, new):
    for u in range(len(user_list)):
        print(f"user {user_list[u]}")
        for d in range(len(depth_list)):
            o_val = old[u][d]
            n_val = new[u][d]
            diff = ((n_val - o_val) / o_val) * 100
            print(f"| {depth_list[d]} |{o_val:10.0f}| {n_val:10.0f}| {diff:10.1f} |")
        print("--------")

In [9]:
def createIndexes():
    obj1.create_index([("l1.device", 1)])
    obj2.create_index([("l1.l2.device", 1)])
    obj4.create_index([("l1.l2.l3.l4.device", 1)])
    obj8.create_index([("l1.l2.l3.l4.l5.l6.l7.l8.device", 1)])

In [10]:
def clearIndexes():
    obj1.drop_indexes()
    obj2.drop_indexes()
    obj4.drop_indexes()
    obj8.drop_indexes()

In [11]:
def showIndexes():
    for i in obj1.list_indexes():
        print(i)
    for a in obj2.list_indexes():
        print(a)
    for b in obj4.list_indexes():
        print(b)
    for c in obj8.list_indexes():
        print(c)

In [12]:
def count():
    for c in coll_list:
        print(c.count_documents({"inserted": True}))

In [17]:
delete_batch()

DeleteResult({'n': 7000, 'ok': 1.0}, acknowledged=True)
DeleteResult({'n': 7000, 'ok': 1.0}, acknowledged=True)
DeleteResult({'n': 7000, 'ok': 1.0}, acknowledged=True)
DeleteResult({'n': 7000, 'ok': 1.0}, acknowledged=True)


In [18]:
count()

0
0
0
0


In [15]:
showIndexes()

SON([('v', 2), ('key', SON([('_id', 1)])), ('name', '_id_')])
SON([('v', 2), ('key', SON([('_id', 1)])), ('name', '_id_')])
SON([('v', 2), ('key', SON([('_id', 1)])), ('name', '_id_')])
SON([('v', 2), ('key', SON([('_id', 1)])), ('name', '_id_')])


In [14]:
clearIndexes()

In [132]:
without_index = execute_insert()

user  1
  depth: 0 1 2 3 ---
user  2
  depth: 0 1 2 3 ---
user  4
  depth: 0 1 2 3 ---


In [137]:
createIndexes()

In [138]:
showIndexes()

SON([('v', 2), ('key', SON([('_id', 1)])), ('name', '_id_')])
SON([('v', 2), ('key', SON([('l1.device', 1)])), ('name', 'l1.device_1')])
SON([('v', 2), ('key', SON([('_id', 1)])), ('name', '_id_')])
SON([('v', 2), ('key', SON([('l1.l2.device', 1)])), ('name', 'l1.l2.device_1')])
SON([('v', 2), ('key', SON([('_id', 1)])), ('name', '_id_')])
SON([('v', 2), ('key', SON([('l1.l2.l3.l4.device', 1)])), ('name', 'l1.l2.l3.l4.device_1')])
SON([('v', 2), ('key', SON([('_id', 1)])), ('name', '_id_')])
SON([('v', 2), ('key', SON([('l1.l2.l3.l4.l5.l6.l7.l8.device', 1)])), ('name', 'l1.l2.l3.l4.l5.l6.l7.l8.device_1')])


In [139]:
with_index = execute_insert()

calculate_diffs(without_index, with_index)

user  1
  depth: 0 1 2 3 ---
user  2
  depth: 0 1 2 3 ---
user  4
  depth: 0 1 2 3 ---
user 1
| 1 |        21|         45|      112.7 |
| 2 |        16|         29|       75.4 |
| 4 |        17|         18|        2.6 |
| 8 |        16|         20|       23.4 |
--------
user 2
| 1 |        25|         20|      -20.6 |
| 2 |        20|         22|        9.5 |
| 4 |        20|         24|       20.5 |
| 8 |        41|         26|      -35.7 |
--------
user 4
| 1 |        69|         29|      -58.3 |
| 2 |        36|         38|        5.8 |
| 4 |        32|         36|       11.2 |
| 8 |        49|         34|      -30.8 |
--------


In [20]:
for c in coll_list:
    print(c.count_documents({}))

100000
100000
100000
100000
