In [1]:
import pymongo
import time
import threading
import json
import random
from bson import ObjectId
from pymongo.collection import Collection

In [2]:
cl = pymongo.MongoClient("mongodb://localhost:27017")
db = cl["deep"]
arr1 = db["arr1"]
arr2 = db["arr2"]
arr4 = db["arr4"]
arr8 = db["arr8"]
coll_list = [arr1, arr2, arr4, arr8]

In [3]:
user_list = [1, 2, 4]
depth_list = [1, 2, 4, 8]

In [137]:
a1_where_index = {"a1.device": {"$gte": 9750}}
a2_where_index = {"a1.a2.device": {"$gte": 9870}}
a4_where_index = {"a1.a2.a3.a4.device": {"$gte": 9968}}
a8_where_index = {"a1.a2.a3.a4.a5.a6.a7.a8.device": {"$gte": 9998}}
where_index_list = [a1_where_index, a2_where_index, a4_where_index, a8_where_index]

In [138]:
a1_where_non_index = {"a1.total_volume_bytes": {"$gte": 9750}}
a2_where_non_index = {"a1.a2.total_volume_bytes": {"$gte": 9870}}
a4_where_non_index = {"a1.a2.a3.a4.total_volume_bytes": {"$gte": 9968}}
a8_where_non_index = {"a1.a2.a3.a4.a5.a6.a7.a8.total_volume_bytes": {"$gte": 9998}}
where_non_index_list = [a1_where_non_index, a2_where_non_index, a4_where_non_index, a8_where_non_index]

In [139]:
a1_update_index = [{"$set": {"a1.device": random.randint(0, 9999)}}]
a2_update_index = [{"$set": {"a1.a2.device": random.randint(0, 9999)}}]
a4_update_index = [{"$set": {"a1.a2.a3.a4.device": random.randint(0, 9999)}}]
a8_update_index = [{"$set": {"a1.a2.a3.a4.a5.a6.a7.a8.device": random.randint(0, 9999)}}]

update_index_list = [a1_update_index, a2_update_index, a4_update_index, a8_update_index]

In [140]:
a1_update_non_index = [{"$set": {"a1.app": "deepbench"}}]
a2_update_non_index = [{"$set": {"a1.a2.app": "deepbench"}}]
a4_update_non_index = [{"$set": {"a1.a2.a3.a4.app": "deepbench"}}]
a8_update_non_index = [{"$set": {"a1.a2.a3.a4.a5.a6.a7.a8.app": "deepbench"}}]

update_non_index_list = [a1_update_non_index, a2_update_non_index, a4_update_non_index, a8_update_non_index]

In [9]:
def resolveUpdate(isUpdateIndex: bool, depth: int):
    if(isUpdateIndex):
        return update_index_list[depth]
    else:
        return update_non_index_list[depth]

In [10]:
def resolveWhere(isIndexWhere: bool , depth: int):
    if(isIndexWhere):
        return where_index_list[depth]
    else:
        return where_non_index_list[depth]

In [11]:
def measure_update(user_id: int, duration_list: list[int], coll: Collection, whereClause: list[dict], updateClause: list[dict]):
    start = time.perf_counter()
    coll.update_many(whereClause, updateClause)
    end = time.perf_counter()
    duration = (end - start) * 1000
    duration_list[user_id] = duration

In [12]:
def test_update(num_of_users: int, depth: int, isWhereIndex: bool, isUpdateIndex: bool):
    threads_list = []
    duration_list = [None] * num_of_users
    whereClause = resolveWhere(isWhereIndex, depth)
    updateClause = resolveUpdate(isUpdateIndex, depth)
    coll = coll_list[depth]
    
    for x in range(num_of_users):
        t = threading.Thread(target=measure_update, args=(x, duration_list, coll, whereClause, updateClause,))
        t.start()
        threads_list.append(t)

    
    for t in threads_list:
        t.join()
        
    result = sum(duration_list) / len(duration_list)
    return result    

In [51]:
def execute_update(isWhereIndex: bool, isUpdateIndex: bool):
    u_len = len(user_list)
    d_len = len(depth_list)
    result = [[0 for _ in range(d_len)] for _ in range(u_len)]
    
    for u in range(u_len):
        print(f"user: {user_list[u]}")
        print("  depth: ", end=" ")
        for d in range(d_len):
            result[u][d] = test_update(user_list[u], d, isWhereIndex, isUpdateIndex)
            print(f"{depth_list[d]}", end=" ")
        print("---")
        if isWhereIndex and isUpdateIndex:
            print("  reset db: ", end=" ")
            reset_db()
            print("done")
    return result

In [14]:
def showIndexes():
    for i in arr1.list_indexes():
        print(i)
    for a in arr2.list_indexes():
        print(a)
    for b in arr4.list_indexes():
        print(b)
    for c in arr8.list_indexes():
        print(c)

In [32]:
def clearIndexes():
    arr1.drop_indexes()
    arr2.drop_indexes()
    arr4.drop_indexes()
    arr8.drop_indexes()

In [31]:
def createIndexes():
    coll_list[0].create_index([("a1.device", 1)])
    coll_list[1].create_index([("a1.a2.device", 1)])
    coll_list[2].create_index([("a1.a2.a3.a4.device", 1)])
    coll_list[3].create_index([("a1.a2.a3.a4.a5.a6.a7.a8.device", 1)])

In [17]:
def deleteAllData():
    arr1.delete_many({})
    arr2.delete_many({})
    arr4.delete_many({})
    arr8.delete_many({})

In [49]:
def importData():
    with open("data/arr1.json") as f:
        data = [json.loads(line) for line in f]
        arr1.insert_many(data)
    print("import: arr1", end=" ")
    
    with open("data/arr2.json") as f:
        data = [json.loads(line) for line in f]
        arr2.insert_many(data)
    print("arr2", end=" ")
    
    with open("data/arr4.json") as f:
        data = [json.loads(line) for line in f]
        arr4.insert_many(data)
    print("arr4", end=" ")
        
    with open("data/arr8.json") as f:
        data = [json.loads(line) for line in f]
        arr8.insert_many(data)
    print("arr8", end=" ")

In [45]:
def calculate_diffs(old, new):
    for u in range(len(user_list)):
        print(f"user {user_list[u]}")
        for d in range(len(depth_list)):
            o_val = old[u][d]
            n_val = new[u][d]
            diff = ((n_val - o_val) / o_val) * 100
            print(f"| {depth_list[d]} |{o_val:10.0f}| {n_val:10.0f}| {diff:10.1f} |")
        print("--------")

In [44]:
def calc_diff_precise(old, new):
    for u in range(len(user_list)):
        print(f"user {user_list[u]}")
        for d in range(len(depth_list)):
            o_val = old[u][d]
            n_val = new[u][d]
            diff = ((n_val - o_val) / o_val) * 100
            print(f"depth: {depth_list[d]}, {o_val:10.2f} {n_val:10.2f} {diff:10.2f}")
        print("--------")

In [35]:
def hasIndexes():
    indexes = arr1.list_indexes().to_list()
    if(len(indexes) == 2):
        return True
    else:
        return False

In [50]:
def reset_db():
    hadIndexes = hasIndexes()
    if hadIndexes:
        clearIndexes()
        print("cleared indexes", end=", ")
    deleteAllData()
    print("deleted data", end=", ")
    importData()
    if hadIndexes:
        createIndexes()
        print("created indexes", end=", ")

In [141]:
clearIndexes()

In [156]:
showIndexes()

SON([('v', 2), ('key', SON([('_id', 1)])), ('name', '_id_')])
SON([('v', 2), ('key', SON([('a1.device', 1)])), ('name', 'a1.device_1')])
SON([('v', 2), ('key', SON([('_id', 1)])), ('name', '_id_')])
SON([('v', 2), ('key', SON([('a1.a2.device', 1)])), ('name', 'a1.a2.device_1')])
SON([('v', 2), ('key', SON([('_id', 1)])), ('name', '_id_')])
SON([('v', 2), ('key', SON([('a1.a2.a3.a4.device', 1)])), ('name', 'a1.a2.a3.a4.device_1')])
SON([('v', 2), ('key', SON([('_id', 1)])), ('name', '_id_')])
SON([('v', 2), ('key', SON([('a1.a2.a3.a4.a5.a6.a7.a8.device', 1)])), ('name', 'a1.a2.a3.a4.a5.a6.a7.a8.device_1')])


In [157]:
clearIndexes()

In [152]:
reset_db()

cleared indexes, deleted data, import: arr1 arr2 arr4 arr8 created indexes, 

In [158]:
without_index = execute_update(True, False)

user: 1
  depth:  1 2 4 8 ---
user: 2
  depth:  1 2 4 8 ---
user: 4
  depth:  1 2 4 8 ---


In [145]:
createIndexes()

In [146]:
where_index_update_non_index = execute_update(True, False)

user: 1
  depth:  1 2 4 8 ---
user: 2
  depth:  1 2 4 8 ---
user: 4
  depth:  1 2 4 8 ---


In [154]:
where_index_update_index = execute_update(True, True)

user: 1
  depth:  1 2 4 8 ---
  reset db:  cleared indexes, deleted data, import: arr1 arr2 arr4 arr8 created indexes, done
user: 2
  depth:  1 2 4 8 ---
  reset db:  cleared indexes, deleted data, import: arr1 arr2 arr4 arr8 created indexes, done
user: 4
  depth:  1 2 4 8 ---
  reset db:  cleared indexes, deleted data, import: arr1 arr2 arr4 arr8 created indexes, done


In [148]:
where_non_index_update_index = execute_update(False, True)

user: 1
  depth:  1 2 4 8 ---
user: 2
  depth:  1 2 4 8 ---
user: 4
  depth:  1 2 4 8 ---


In [161]:
calculate_diffs(without_index, where_index_update_non_index)

user 1
| 1 |        55|          9|      -83.6 |
| 2 |        59|         13|      -77.4 |
| 4 |       222|         47|      -78.8 |
| 8 |      2432|        421|      -82.7 |
--------
user 2
| 1 |        18|          8|      -54.3 |
| 2 |        42|         13|      -67.9 |
| 4 |       134|         46|      -66.0 |
| 8 |      2413|        630|      -73.9 |
--------
user 4
| 1 |        29|         15|      -48.4 |
| 2 |        60|         21|      -64.7 |
| 4 |       254|         72|      -71.5 |
| 8 |      4473|       1296|      -71.0 |
--------


In [163]:
calculate_diffs(without_index, where_index_update_index)

user 1
| 1 |        55|         51|       -6.9 |
| 2 |        59|         50|      -14.4 |
| 4 |       222|        170|      -23.3 |
| 8 |      2432|       1718|      -29.4 |
--------
user 2
| 1 |        18|         29|       59.0 |
| 2 |        42|         49|       18.4 |
| 4 |       134|        141|        5.4 |
| 8 |      2413|       1905|      -21.1 |
--------
user 4
| 1 |        29|         45|       54.0 |
| 2 |        60|         67|       12.2 |
| 4 |       254|        233|       -8.3 |
| 8 |      4473|       3506|      -21.6 |
--------


In [162]:
calculate_diffs(without_index, where_non_index_update_index)

user 1
| 1 |        55|         62|       12.7 |
| 2 |        59|        162|      175.5 |
| 4 |       222|        263|       18.5 |
| 8 |      2432|       4091|       68.2 |
--------
user 2
| 1 |        18|         33|       78.7 |
| 2 |        42|         57|       36.9 |
| 4 |       134|        278|      107.5 |
| 8 |      2413|       4602|       90.7 |
--------
user 4
| 1 |        29|         49|       67.8 |
| 2 |        60|        125|      108.2 |
| 4 |       254|        603|      137.4 |
| 8 |      4473|      10165|      127.3 |
--------
