In [148]:
"""
Run these before:

"""
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib

from glob import glob

SAVE_LOCATION = "images"
BECHMARKS_LOCATION  = "./bechmarks"

DBS = [
    "mongo",
    "postgres",
    "cassandra"
]

# Run the scripts
sizes = [10, 100, 1000, 10000, 10000, 100000]
batch_sizes = [1, 10, 100, 1000, 450, 300]
files = {}
for size in sizes:
    for batch in batch_sizes:
        something = glob(f"benchmarks/*operations-size-{size}-batch_size-{batch}.json")
        if something:
            files[f"{size}-{batch}"] = something

In [153]:
def plot_by_points(things):
    dbs = [
        "mongo",
        "postgres",
        "cassandra"
    ]
    values_to_plot = {
        'connec': "red",
        'create': "blue",
        'add_jobs': "yellow",
        'add_blobs': "black",
        'query_jobs_all': "pink",
        'query_blobs_all': "orange"
    }
    colors = {
        "mongo": "green",
        "postgres": "blue",
        "cassandra": "red"
    }
    save_name = things[0].split("/")[-1].split("-")
    data = {}
    for file in things:
        values = json.load(open(file, 'r'))
        for i, key in enumerate(values_to_plot):
            for item in values.keys():
                if key in item:
#                     plt.scatter(i+1, values[item], label=f"{key}-{file.split('/')[-1].split('-')[0]}-{i}", color=colors[file.split("/")[-1].split("-")[0]])
                    plt.scatter(i+1, values[item], label=f"{file.split('/')[-1].split('-')[0]}", color=colors[file.split("/")[-1].split("-")[0]])
                elif key == "create_tables" :
#                     plt.scatter(i+1, 0, label=f"{key}-{file.split('/')[-1].split('-')[0]}-{i}", color=colors[file.split("/")[-1]].split("-")[0])
                    plt.scatter(i+1, 0, label=f"{file.split('/')[-1].split('-')[0]}", color=colors[file.split("/")[-1]].split("-")[0])

    plt.legend(loc="upper left")
    plt.title(f"DB Comparison for {save_name[-3]+'-'+save_name[-1][:-5]} ")
    plt.xlabel("test")
    plt.xticks(range(1, len(["connect", "create_tables", "addjobs", "addblobs"])+1), ["connect", "create_tables", "addjobs", "addblobs"])
    plt.ylabel("time in seconds")
    fig = matplotlib.pyplot.gcf()
    fig.set_size_inches(18.5, 10.5)
    print(f'images/{save_name[-3]+"-"+save_name[-1][:-5]}')
    fig.savefig(f'images/{save_name[-3]+"-"+save_name[-1][:-5]}', dpi=100)
    plt.clf()

In [154]:
for key in files.keys():
    plot_by_points(files[key])

images/1000-1
images/1000-10
images/1000-100
images/1000-1000
images/1000-450
images/10000-1
images/10000-10
images/10000-100
images/10000-1000
images/10000-450
images/100000-1
images/100000-100
images/100000-1000
images/100000-450
images/100000-300


<Figure size 1332x756 with 0 Axes>

# Plotting the Memory Usage

In [182]:
def convert_to_float(item):
    return float(item) if "-" not in item else None


def plot_mem_usage_percent(size=10000, batch_size=1):
    filename = f"./benchmarks/*-performance-size-{size}-batch_size-{batch_size}.json"
    files = glob(filename)
    plot_points = {}
    for file in files:
        data = json.load(open(file, 'r'))['stats']
        mem_usage_percent = [convert_to_float(i['MEM %'][:-1]) for i in data]
        plot_points[file.split("/")[-1].split("-")[0]] = mem_usage_percent

    if plot_points:

        for key in plot_points.keys():
            plt.plot(plot_points[key], label=key)

        plt.title("mem_usage_percent")
        plt.legend(loc="upper left")
        plt.savefig(f"images/mem_usage_percent-{size}-{batch_size}.png", dpi=100)
        plt.clf()

In [183]:
sizes = [10, 100, 1000, 10000, 10000, 100000]
batch_sizes = [1, 10, 100, 1000, 450, 300]
files = {}
for size in sizes:
    for batch in batch_sizes:
        plot_mem_usage_percent(size, batch)

<Figure size 432x288 with 0 Axes>

# Plotting the CPU usage

In [187]:
def convert_to_float(item):
    return float(item) if "-" not in item else None


def plot_cpu_usage_percent(size=10000, batch_size=1):
    filename = f"./benchmarks/*-performance-size-{size}-batch_size-{batch_size}.json"
    files = glob(filename)
    plot_points = {}
    for file in files:
        data = json.load(open(file, 'r'))['stats']
        mem_usage_percent = [convert_to_float(i['CPU %'][:-1]) for i in data]
        plot_points[file.split("/")[-1].split("-")[0]] = mem_usage_percent

    if plot_points:

        for key in plot_points.keys():
            plt.plot(plot_points[key], label=key)

        plt.title("mem_usage_percent")
        plt.legend(loc="upper left")
        plt.savefig(f"images/cpu_usage_percent-{size}-{batch_size}.png", dpi=100)
        plt.clf()

In [189]:
sizes = [10, 100, 1000, 10000, 10000, 100000]
batch_sizes = [1, 10, 100, 1000, 450, 300]
files = {}
for size in sizes:
    for batch in batch_sizes:
        plot_cpu_usage_percent(size, batch)

<Figure size 432x288 with 0 Axes>

# Ploting the Mem Usage

In [218]:
def convert_to_float(item):
    return float(item) if "-" not in item else None


def mem_cpu_usage(size=10000, batch_size=1):

    filename = f"./benchmarks/*-performance-size-{size}-batch_size-{batch_size}.json"
    files = glob(filename)
    plot_points = {}
    for file in files:
        data = json.load(open(file, 'r'))['stats']
        mem_usage_total=[]
        for mem in [i['MEM USAGE/LIMIT'].split("/")[0] for i in data]:
            mem = mem.strip()
            if mem=="--":
                mem=None
            elif mem == '':
                mem = 0
            else:
                if "kiB" in mem:
                    mem = float(mem.replace("kiB", "")) # for kbs
                elif "MiB" in mem:
                    mem = float(mem.replace("MiB", ""))*1024 # for kbs
                elif "GiB" in mem:
                    mem = float(mem.replace("GiB", ""))*1024*1024 # for kbs
            mem_usage_total.append(mem)
        plot_points[file.split("/")[-1].split("-")[0]] = mem_usage_total

    if plot_points:
        print(1)
        for key in plot_points.keys():
            plt.plot(plot_points[key], label=key)

        plt.title("mem_usage_percent")
        plt.legend(loc="upper left")
        plt.savefig(f"images/MEM_USAGE-{size}-{batch_size}.png", dpi=100)
        plt.clf()

In [219]:
sizes = [10, 100, 1000, 10000, 10000, 100000]
batch_sizes = [1, 10, 100, 1000, 450, 300]
files = {}
for size in sizes:
    for batch in batch_sizes:
        mem_cpu_usage(size, batch)

1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1


<Figure size 432x288 with 0 Axes>

# Plotting the NET io

In [264]:
def convert_to_float(item):
    return float(item) if "-" not in item else None


def net_io_usage(size=10000, batch_size=1):

    filename = f"./benchmarks/*-performance-size-{size}-batch_size-{batch_size}.json"
    files = glob(filename)
    plot_points = {}
    for file in files:
        data = json.load(open(file, 'r'))['stats']
        net_io = []
        for (i, j) in [i['NET I/O'].replace("0B", "0").split("/") for i in data if "--" not in i['NET I/O']]:
            if i == '':
                i = 0
            else:
                if "kB" in i:
                    i = float(i.replace("kB", "")) # for kbs
                elif "MB" in i:
                    i = float(i.replace("MB", ""))*1024 # for kbs
                elif "GB" in i:
                    i = float(i.replace("GB", ""))*1024*1024 # for kbs

            if j == '':
                j = 0
            else:
                if "kB" in j:
                    j = float(j.replace("kB", "")) # for kbs
                elif "MB" in j:
                    j = float(j.replace("MB", ""))*1024 # for kbs
                elif "GB" in j:
                    j = float(j.replace("GB", ""))*1024*1024 # for kbs
#             net_io.append([i, j])
            net_io.append(i)

        plot_points[file.split("/")[-1].split("-")[0]] = net_io

    if plot_points:
        print(1)
        for key in plot_points.keys():
            plt.plot(plot_points[key], label=key)

        plt.title("mem_usage_percent")
        plt.legend(loc="upper left")
        plt.savefig(f"images/NETIO-{size}-{batch_size}.png", dpi=100)
        plt.clf()

In [265]:
sizes = [10, 100, 1000, 10000, 10000, 100000]
batch_sizes = [1, 10, 100, 1000, 450, 300]
files = {}
for size in sizes:
    for batch in batch_sizes:
        net_io_usage(size, batch)

1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1


<Figure size 432x288 with 0 Axes>

# Plotting BLOCK io

In [267]:
def convert_to_float(item):
    return float(item) if "-" not in item else None


def block_io_usage(size=10000, batch_size=1):

    filename = f"./benchmarks/*-performance-size-{size}-batch_size-{batch_size}.json"
    files = glob(filename)
    plot_points = {}
    for file in files:
        data = json.load(open(file, 'r'))['stats']
        net_io = []
        for (i, j) in [i['BLOCK I/O'].replace("0B", "0").split("/") for i in data if "--" not in i['NET I/O']]:
            if i == '':
                i = 0
            else:
                if "kB" in i:
                    i = float(i.replace("kB", "")) # for kbs
                elif "MB" in i:
                    i = float(i.replace("MB", ""))*1024 # for kbs
                elif "GB" in i:
                    i = float(i.replace("GB", ""))*1024*1024 # for kbs

            if j == '':
                j = 0
            else:
                if "kB" in j:
                    j = float(j.replace("kB", "")) # for kbs
                elif "MB" in j:
                    j = float(j.replace("MB", ""))*1024 # for kbs
                elif "GB" in j:
                    j = float(j.replace("GB", ""))*1024*1024 # for kbs
#             net_io.append([i, j])
            net_io.append(i)

        plot_points[file.split("/")[-1].split("-")[0]] = net_io

    if plot_points:
        print(1)
        for key in plot_points.keys():
            plt.plot(plot_points[key], label=key)

        plt.title("mem_usage_percent")
        plt.legend(loc="upper left")
        plt.savefig(f"images/BLOCK-{size}-{batch_size}.png", dpi=100)
        plt.clf()

In [268]:
sizes = [10, 100, 1000, 10000, 10000, 100000]
batch_sizes = [1, 10, 100, 1000, 450, 300]
files = {}
for size in sizes:
    for batch in batch_sizes:
        block_io_usage(size, batch)

1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1


<Figure size 432x288 with 0 Axes>