In [1]:
import asyncio
import aiohttp
import time
import random
import numpy as np
import base64
import os
from pathlib import Path
import hashlib

ModuleNotFoundError: No module named 'matplotlib'

In [61]:
benchmark_for = dict()

In [87]:
lang = "eng"

async def one_shot(*, img, deadline):
    ret = dict()
    
    start_time = time.time()
    async with aiohttp.ClientSession() as session:
        ret["start_time"] = start_time
        resp = await session.post("http://localhost/api/image_submit/", json={"image": img, "lang": lang})
        resp.raise_for_status()
        
        req_id = (await resp.json())["request_id"]
        ret["image_hash"] = hashlib.md5(img.encode()).hexdigest()

    while True:
        async with aiohttp.ClientSession() as session:
            try:
                status_check = await session.get("http://localhost/api/status/", params={"request_id": req_id})
                status_check.raise_for_status()
                
                json_data = await status_check.json()
                status = json_data["status"]
                if status not in ret:
                    ret[status+"_time"] = time.time()

                if json_data.get("result"):
                    ret["result_time"] = time.time()
                    ret["result"] = json_data["result"]
                    break
            except Exception as exc:
                ret["status_error"] = f"{type(exc)}, {str(exc)}"
                ret["status_error_time"] = time.time()
                break
            finally:
                if time.time() > deadline:
                    ret["timeout"] = True
                    ret["timeout_time"] = time.time()
                    return ret
                if "result" not in ret:
                    await asyncio.sleep(1)
    return ret
                

async def start_client(ttl, imgs):
    start_time = time.time()
    ret = []
    while True:
        img = random.choice(imgs)
        if time.time() - start_time > ttl - 5:  # threshold
            return ret
        try:
            result = await one_shot(img=img, deadline=start_time + ttl)
            if "timeout" in ret:
                return ret
            ret.append(result)
        except asyncio.TimeoutError:
            print("Timeout")
            return ret
    return ret


def ls_images():
    imgs = []
    d = Path("./text_samples/")
    for root, dirs, files in os.walk(d):
        for filename in files:
            if not filename.endswith((".png", ".jpg")):
                continue
            with open(d/filename, "rb") as f:
                imgs.append(base64.encodebytes(f.read()).decode())
    return imgs
                  
    
async def benchmark(clients, duration):
    imgs = ls_images()
    start_time = time.time()
    result = await asyncio.gather(*[start_client(duration, imgs) for _ in range(clients)])
    
    return start_time, result

In [92]:
latencies = dict()
total = dict()
for clients in [1,5,15,20]:
    print(f"{clients} clients")
    query_start, bm_data = await benchmark(clients, 60)
    t = []
    for x in bm_data:
        t.extend(x)
    for a in t:
        for k in a:
            if k.endswith("time"):
                a[k] -= query_start
    total[clients] = t
    print("Sleeping")
    time.sleep(10)
    print("Wake up")

1 clients
Sleeping
Wake up
5 clients
Sleeping
Wake up
15 clients
Sleeping
Wake up
20 clients
Sleeping
Wake up


In [93]:
latencies = {c: np.array([x["result_time"] - x["start_time"] for x in total[c] if "result_time" in x]) for c in total}
latencies

{1: array([15.23122716, 25.32169294,  2.08046198,  1.04274201,  1.05871487,
         1.0404799 ,  1.0521431 ,  1.04868603,  1.04458189,  1.05296612,
         1.05737019,  1.04781175,  1.0481441 ,  1.04587889,  1.05462599]),
 5: array([ 3.25070429, 21.29825401,  1.06564569, 30.42092204,  7.23122191,
        39.66141105,  9.14002728,  7.22568393, 36.62498593,  2.06458902,
         1.04498291,  9.15673709,  7.27069736, 37.62671995,  3.08446097,
         1.06667328, 10.11943817,  7.29388309, 33.54517508,  7.10188794,
        11.20923638]),
 15: array([24.88037682,  1.26374292, 13.52970695, 47.48995495, 40.26038885,
         1.20713615, 21.48198295, 53.43902087, 11.58625889, 26.8931911 ,
        13.65891099]),
 20: array([35.22230029,  1.31637692, 21.89293885,  8.79851103, 16.75828409,
        54.57278919, 14.82511401, 26.081532  , 38.25341105, 14.82149601,
         1.35204792,  2.52271485,  1.51572609])}

In [94]:
{c: np.percentile(v, 50) for c,v in latencies.items()}

{1: 1.0521430969238281,
 5: 7.2938830852508545,
 15: 21.481982946395874,
 20: 14.825114011764526}

In [None]:
50: 
{1: 1.048432469367981,
 5: 2.1049283742904663,
 15: 6.161871910095215,
 20: 8.814648151397705}

95:
{1: 1.0595536351203918,
 5: 3.1468580603599547,
 15: 7.338586020469665,
 20: 9.959573864936829}



In [71]:
benchmark_for[1] = total