# RQ2

To what extent can the choice of the vector DB diminish resource usage?

In [1]:
%load_ext autoreload
%autoreload 2

from results_processor import get_experiments_data, compute_total_energy_per_run, get_variation_runs_data, get_ci_deviation, compute_kruskal_wallis, compute_wilcoxon

### 1. Indexing 

In [2]:
indexing_experiments_data = get_experiments_data(
    "rq2_indexing",
    [
        "Total DRAM Power [W]",
        "IA Cores Power [W]",
    ],
    idle_state="Docker running",
)
indexing_runs_data = compute_total_energy_per_run(indexing_experiments_data)

In [18]:
index_db_variation_runs = get_variation_runs_data(
    indexing_runs_data,
    ["gte_base_cqadupstack_webmasters_milvus"],
    ["gte_base_cqadupstack_webmasters_qdrant"],
    ["gte_base_cqadupstack_webmasters_weaviate"],
    "database",
    ["milvus", "qdrant", "weaviate"],
)

index_db_variation_runs["CPU Cores and DRAM Energy [J]"] = (
    index_db_variation_runs["CPU Cores Energy [J]"] + index_db_variation_runs["DRAM Energy [J]"]
)

In [19]:
index_db_variation_runs.groupby("database").aggregate(
    lambda x: str(round(x.mean(), 2)) + " ± " + str(get_ci_deviation(x))
)

Unnamed: 0_level_0,CPU Cores Energy [J],DRAM Energy [J],duration [s],CPU Cores and DRAM Energy [J]
database,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
milvus,2142.85 ± 115.0,100.7 ± 6.63,132.04 ± 2.16,2243.56 ± 121.39
qdrant,2333.64 ± 30.18,106.59 ± 3.2,351.55 ± 18.21,2440.23 ± 33.18
weaviate,199.29 ± 4.74,14.0 ± 0.43,10.99 ± 0.19,213.28 ± 5.16


In [5]:
compute_wilcoxon(index_db_variation_runs, "weaviate", "milvus", "CPU Cores Energy [J]", "database")

WilcoxonResult(statistic=np.float64(0.0), pvalue=np.float64(0.0009765625))

In [6]:
compute_wilcoxon(index_db_variation_runs, "milvus", "qdrant", "CPU Cores Energy [J]", "database")

WilcoxonResult(statistic=np.float64(4.0), pvalue=np.float64(0.0068359375))

In [7]:
compute_wilcoxon(index_db_variation_runs, "weaviate", "milvus", "duration [s]", "database")

WilcoxonResult(statistic=np.float64(0.0), pvalue=np.float64(0.0009765625))

In [8]:
compute_wilcoxon(index_db_variation_runs, "milvus", "qdrant", "duration [s]", "database")

WilcoxonResult(statistic=np.float64(0.0), pvalue=np.float64(0.0009765625))

### 2. Querying 

In [9]:
querying_experiments_data = get_experiments_data(
    "rq2_querying",
    [
        "Total DRAM Power [W]",
        "IA Cores Power [W]",
    ],
)
querying_runs_data = compute_total_energy_per_run(querying_experiments_data)

In [10]:
query_db_variation_runs = get_variation_runs_data(
    querying_runs_data,
    ["gte_base_cqadupstack_webmasters_milvus"],
    ["gte_base_cqadupstack_webmasters_qdrant"],
    ["gte_base_cqadupstack_webmasters_weaviate"],
    "database",
    ["milvus", "qdrant", "weaviate"],
)

query_db_variation_runs["CPU Cores and DRAM Energy [J]"] = (
    query_db_variation_runs["CPU Cores Energy [J]"] + query_db_variation_runs["DRAM Energy [J]"]
)

In [11]:
query_db_variation_runs.groupby("database").aggregate(
    lambda x: str(round(x.mean(), 2)) + " ± " + str(get_ci_deviation(x))
)

Unnamed: 0_level_0,CPU Cores Energy [J],DRAM Energy [J],duration [s],CPU Cores and DRAM Energy [J]
database,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
milvus,120.4 ± 3.3,4.8 ± 0.19,9.93 ± 0.11,125.2 ± 3.48
qdrant,125.71 ± 11.09,7.08 ± 0.8,8.91 ± 0.18,132.78 ± 11.89
weaviate,25.02 ± 0.47,1.24 ± 0.04,2.14 ± 0.02,26.26 ± 0.5


In [12]:
compute_wilcoxon(
    query_db_variation_runs, "weaviate", "milvus", "CPU Cores and DRAM Energy [J]", "database"
)

WilcoxonResult(statistic=np.float64(0.0), pvalue=np.float64(0.0009765625))

In [13]:
compute_wilcoxon(
    query_db_variation_runs, "milvus", "qdrant", "CPU Cores and DRAM Energy [J]", "database"
)

WilcoxonResult(statistic=np.float64(21.0), pvalue=np.float64(0.2783203125))

In [14]:
compute_wilcoxon(
    query_db_variation_runs, "qdrant", "milvus", "CPU Cores and DRAM Energy [J]", "database"
)

WilcoxonResult(statistic=np.float64(34.0), pvalue=np.float64(0.75390625))

In [15]:
compute_kruskal_wallis(
    query_db_variation_runs, "database", ["qdrant", "milvus"], "CPU Cores and DRAM Energy [J]"
)

KruskalResult(statistic=np.float64(0.28000000000000114), pvalue=np.float64(0.5967012167293555))

In [16]:
compute_wilcoxon(query_db_variation_runs, "weaviate", "milvus", "duration [s]", "database")

WilcoxonResult(statistic=np.float64(0.0), pvalue=np.float64(0.0009765625))

In [17]:
compute_wilcoxon(query_db_variation_runs, "qdrant", "milvus", "duration [s]", "database")

WilcoxonResult(statistic=np.float64(0.0), pvalue=np.float64(0.0009765625))