In [2]:
import tempfile
import subprocess
import pandas as pd
from tqdm import tqdm
from time import sleep
from tiresias.server import api

In [3]:
clients = []
results = []
nb_clients = 110
#server = subprocess.Popen(['tiresias-server']) # This is local
server_hostname = "http://18.208.172.20:3000" # This is on AWS
for client_id in tqdm(range(nb_clients)):
    clients.append(subprocess.Popen(['tiresias-client', "--db_dir", tempfile.mkdtemp(), "--db_port", str(8000 + client_id), "--server", server_hostname]))
    sleep(0.5)
sleep(5.0)

100%|██████████| 110/110 [00:55<00:00,  1.97it/s]


In [6]:
for _ in range(20):
    for sample_size in tqdm([10, 20, 30, 40, 50, 60, 70, 80, 90, 100]):
        query_id = api.create_query(server_hostname, {
            "type": "basic",
            "epsilon": 1.0,
            "min_sample_size": sample_size,
            "featurizer": "SELECT x1 FROM hello_world.two_sum LIMIT 1",
            "aggregator": "median"
        })
        for i in range(5):
            sleep(1.0 + i)
            query = api.fetch_query(server_hostname, query_id)
            if "end" in query:
                break
        results.append({
            "task": "median",
            "nb_users": sample_size,
            "running_time": query["end"] - query["start"]
        })
        sleep(1.0)

        query_id = api.create_query(server_hostname, {
            "type": "machine_learning",
            "epsilon": 10.0,
            "min_sample_size": sample_size,
            "featurizer": "SELECT x1, x2, y FROM hello_world.two_sum LIMIT 1",
            "aggregator": {
                "model": "LinearRegression",
                "inputs": ["x1", "x2"],
                "output": "y",
                "data_norm": [(0.0, 100.0), (0.0, 100.0)]
            }
        })
        for i in range(5):
            sleep(1.0 + i)
            query = api.fetch_query(server_hostname, query_id)
            if "end" in query:
                break
        results.append({
            "task": "regression",
            "nb_users": sample_size,
            "running_time": query["end"] - query["start"]
        })

pd.DataFrame(results)

100%|██████████| 10/10 [00:37<00:00,  3.75s/it]
100%|██████████| 10/10 [00:38<00:00,  3.80s/it]
100%|██████████| 10/10 [00:40<00:00,  4.10s/it]
100%|██████████| 10/10 [00:38<00:00,  3.89s/it]
100%|██████████| 10/10 [00:39<00:00,  3.93s/it]
100%|██████████| 10/10 [00:39<00:00,  3.90s/it]
100%|██████████| 10/10 [00:41<00:00,  4.11s/it]
100%|██████████| 10/10 [00:41<00:00,  4.13s/it]
100%|██████████| 10/10 [00:45<00:00,  4.54s/it]
100%|██████████| 10/10 [00:44<00:00,  4.45s/it]
100%|██████████| 10/10 [00:39<00:00,  3.93s/it]
100%|██████████| 10/10 [00:39<00:00,  3.91s/it]
100%|██████████| 10/10 [00:42<00:00,  4.27s/it]
100%|██████████| 10/10 [00:40<00:00,  4.09s/it]
100%|██████████| 10/10 [00:41<00:00,  4.11s/it]
100%|██████████| 10/10 [00:44<00:00,  4.41s/it]
100%|██████████| 10/10 [00:42<00:00,  4.22s/it]
100%|██████████| 10/10 [00:40<00:00,  4.02s/it]
100%|██████████| 10/10 [00:43<00:00,  4.33s/it]
  0%|          | 0/10 [00:02<?, ?it/s]


KeyboardInterrupt: 

In [7]:
df = pd.DataFrame(results)
df = df.groupby(["nb_users", "task"]).agg("mean").reset_index()
df[df["task"]=="median"].to_csv("median.csv", index=False)
df[df["task"]=="regression"].to_csv("regression.csv", index=False)
df

Unnamed: 0,nb_users,task,running_time
0,10,median,0.223859
1,10,regression,0.221768
2,20,median,0.305612
3,20,regression,0.356339
4,30,median,0.437753
5,30,regression,0.441694
6,40,median,0.544083
7,40,regression,0.548015
8,50,median,0.623412
9,50,regression,0.659147


In [8]:
df = pd.DataFrame(results)
df.groupby(["nb_users", "task"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,running_time
nb_users,task,Unnamed: 2_level_1
10,median,20
10,regression,19
20,median,19
20,regression,19
30,median,19
30,regression,19
40,median,19
40,regression,19
50,median,19
50,regression,19


In [None]:
for node in [server] + clients:
    node.terminate()