In [1]:
import asyncio
import aiohttp
import time
import random
import numpy as np
import base64
import os
from pathlib import Path
import hashlib

ModuleNotFoundError: No module named 'matplotlib'

In [61]:
benchmark_for = dict()

In [117]:
lang = "eng"

async def one_shot(*, img, deadline):
    ret = dict()
    
    start_time = time.time()
    async with aiohttp.ClientSession() as session:
        ret["start_time"] = start_time
        resp = await session.post("http://localhost/api/image_submit/", json={"image": img, "lang": lang})
        resp.raise_for_status()
        
        req_id = (await resp.json())["request_id"]
        ret["image_hash"] = hashlib.md5(img.encode()).hexdigest()
    ret["check_delay"] = []
    while True:
        async with aiohttp.ClientSession() as session:
            try:
                t = time.time()
                status_check = await session.get("http://localhost/api/status/", params={"request_id": req_id})
                ret["check_delay"].append(time.time() - t)
                status_check.raise_for_status()
                
                json_data = await status_check.json()
                status = json_data["status"]
                if status not in ret:
                    ret[status+"_time"] = time.time()

                if json_data.get("result"):
                    ret["result_time"] = time.time()
                    ret["result"] = json_data["result"]
                    break
            except Exception as exc:
                ret["status_error"] = f"{type(exc)}, {str(exc)}"
                ret["status_error_time"] = time.time()
                break
            finally:
                if time.time() > deadline:
                    ret["timeout"] = True
                    ret["timeout_time"] = time.time()
                    return ret
                if "result" not in ret:
                    await asyncio.sleep(1)
    return ret
                

async def start_client(ttl, imgs):
    start_time = time.time()
    ret = []
    while True:
        img = random.choice(imgs)
        if time.time() - start_time > ttl - 5:  # threshold
            return ret
        try:
            result = await one_shot(img=img, deadline=start_time + ttl)
            if "timeout" in ret:
                return ret
            ret.append(result)
        except asyncio.TimeoutError:
            print("Timeout")
            return ret
    return ret


def ls_images():
    imgs = []
    d = Path("./text_samples/")
    for root, dirs, files in os.walk(d):
        for filename in files:
            if not filename.endswith((".png", ".jpg")):
                continue
            with open(d/filename, "rb") as f:
                imgs.append(base64.encodebytes(f.read()).decode())
    return imgs
                  
    
async def benchmark(clients, duration):
    imgs = ls_images()
    start_time = time.time()
    result = await asyncio.gather(*[start_client(duration, imgs) for _ in range(clients)])
    
    return start_time, result

In [118]:
latencies = dict()
total = dict()
for clients in [1,5,15,20]:
    print(f"{clients} clients")
    query_start, bm_data = await benchmark(clients, 60)
    t = []
    for x in bm_data:
        t.extend(x)
    for a in t:
        for k in a:
            if k.endswith("time"):
                a[k] -= query_start
    total[clients] = t
    if clients != 20:
        print("Sleeping")
        time.sleep(10)
        print("Wake up")

1 clients
Sleeping
Wake up
5 clients
Sleeping
Wake up
15 clients
Sleeping
Wake up
20 clients


In [119]:
latencies = {c: np.array([x["result_time"] - x["start_time"] for x in total[c] if "result_time" in x]) for c in total}
latencies

{1: array([1.07777786, 1.05231786, 1.07082009, 1.04630876, 1.0529232 ,
        1.04497695, 1.05969095, 1.05408192, 1.05370712, 1.05895591,
        1.05386806, 1.039253  , 1.04991603, 1.04283714, 1.05556703,
        1.03823423, 1.04738402, 1.04284716, 1.06275201, 1.06337476,
        1.04367304, 1.06091499, 1.04877424, 1.04935908, 1.06383801,
        1.04841065, 1.05648065, 1.0564971 , 1.06780505, 1.40447211,
        1.04994392, 1.07639122, 1.03777814, 1.06051111, 1.06547213,
        1.06999493, 1.05149484, 1.04783607, 1.0488131 , 1.06206679,
        1.04994488, 1.04945087, 1.05467796, 1.05646491, 1.06835794,
        1.05273819, 1.05364203, 1.05486321, 1.05268717, 1.045717  ,
        1.05023837, 1.04920816]),
 5: array([ 6.23474121, 19.34756899,  1.04745913,  4.09591103, 21.2988627 ,
        10.32612467,  5.11631107,  1.11975288,  7.19150186, 18.33629298,
         1.05371904,  4.09577703,  1.04250383, 10.31599092,  1.07722688,
         1.04832006, 18.34235311, 28.55169797, 15.42255402,  

In [121]:
checks = {c: np.array([np.percentile(x["check_delay"], 99) for x in total[c] if "result_time" in x]) for c in total}
checks

{1: array([0.0104006 , 0.00871804, 0.01239119, 0.009188  , 0.00953139,
        0.012288  , 0.01135096, 0.00877285, 0.00946359, 0.0093109 ,
        0.01291891, 0.00753058, 0.00969471, 0.00990734, 0.00975547,
        0.00947188, 0.00925528, 0.00961751, 0.01362661, 0.01252462,
        0.01125412, 0.01022511, 0.00842418, 0.01034321, 0.01001208,
        0.00959525, 0.0113418 , 0.00935101, 0.01110411, 0.00933569,
        0.00913822, 0.01399074, 0.00967563, 0.00986804, 0.01064601,
        0.01209091, 0.01256956, 0.01071144, 0.01035476, 0.01194014,
        0.01042233, 0.00976167, 0.01064808, 0.00971159, 0.00866784,
        0.00981869, 0.00850254, 0.0103393 , 0.00972083, 0.01241395,
        0.01236411, 0.01148064]),
 5: array([0.02221519, 0.02212209, 0.0093295 , 0.00974304, 0.01437625,
        0.02606959, 0.01257344, 0.02767814, 0.02290833, 0.02076996,
        0.01066174, 0.01055848, 0.01108652, 0.02332499, 0.01498038,
        0.01582849, 0.02562655, 0.02715013, 0.02816998, 0.02496794,
        

In [112]:
{c: np.percentile(v, 50) for c,v in latencies.items()}

{1: 1.0520009994506836,
 5: 11.190875053405762,
 15: 5.650650858879089,
 20: 4.63698673248291}

In [None]:
50: 
{1: 1.048432469367981,
 5: 2.1049283742904663,
 15: 6.161871910095215,
 20: 8.814648151397705}

95:
{1: 1.0595536351203918,
 5: 3.1468580603599547,
 15: 7.338586020469665,
 20: 9.959573864936829}



In [71]:
benchmark_for[1] = total

In [109]:
from multiprocessing import Pool

def s(a,b):
    time.sleep(a)
    return b

with Pool(4) as pool:
    x = pool.starmap(s, [(1,2), (3,4)])