In [1]:
import sys
from pathlib import Path
from threading import Thread
from subprocess import Popen, PIPE
from collections import defaultdict

import numpy as np
import pandas as pd

In [2]:
!ls ../out

2022-08-14T22:24:50.595949_6397.73_sa_5_0.5
2022-08-14T22:27:21.957662_6670.33_sa_double_tl
2022-08-14T22:41:23.433876_7152.92_sa_5x_tl
2022-08-15T07:28:46.169766_6590.63_radius3
2022-08-15T11:57:10.287165_7293.30_5x_tl
2022-08-15T12:12:25.968346_4774.42_5x_tl_debug
2022-08-15T20:06:16.422917_6690.59_erase2
2022-08-15T20:17:02.557507_6757.84_gcc
2022-08-16T00:24:18.000224_6831.50_attraction
2022-08-16T02:04:03.826319_6613.95_juggernaut


In [3]:
scores = {}

def read_stream(name, in_file, out_file):
    for line in in_file:
        #print(f"[{name}] {line.strip()}", file=out_file)
        try:
            scores[name] = int(line.strip().split()[-1])
        except:
            pass

def run(cmd, name):
    proc = Popen(cmd, stdout=PIPE, stderr=PIPE, universal_newlines=True, shell=True)
    stdout_thread = Thread(target=read_stream, args=(name, proc.stdout, sys.stdout))
    stderr_thread = Thread(target=read_stream, args=(name, proc.stderr, sys.stderr))
    stdout_thread.start()
    stderr_thread.start()
    proc.wait()
    return proc

N = 100
for out_dir in sorted(Path("../out").iterdir()):
    for i in range(N):
        out_file = out_dir / f"{i:04d}.txt"
        cmd = f"../tools/target/release/vis ../tools/in/{i:04d}.txt {out_file}"
        name = out_dir.name[out_dir.name.index("_") + 9:]
        run(cmd, (name, i))

In [4]:
dict_out_dir_to_scores = defaultdict(lambda: [0] * N)
for (out_dir, i), score in scores.items():
    dict_out_dir_to_scores[out_dir][i] = score

In [5]:
columns = ["radius3", "erase2", "gcc", "attraction", "juggernaut"]
df = pd.DataFrame(dict_out_dir_to_scores)[columns]
df.corr()

Unnamed: 0,radius3,erase2,gcc,attraction,juggernaut
radius3,1.0,0.845305,0.828297,0.826099,0.856895
erase2,0.845305,1.0,0.931954,0.872396,0.862885
gcc,0.828297,0.931954,1.0,0.873411,0.854806
attraction,0.826099,0.872396,0.873411,1.0,0.854852
juggernaut,0.856895,0.862885,0.854806,0.854852,1.0


In [6]:
df.values.argmax(1)

array([3, 1, 3, 4, 2, 2, 2, 2, 4, 2, 4, 2, 3, 2, 3, 3, 3, 3, 4, 4, 0, 0,
       1, 3, 2, 3, 0, 3, 0, 0, 0, 3, 3, 2, 1, 3, 3, 0, 2, 1, 0, 4, 1, 3,
       3, 3, 0, 2, 3, 0, 4, 3, 2, 2, 0, 3, 2, 2, 3, 0, 1, 0, 4, 4, 3, 4,
       1, 0, 1, 4, 3, 4, 1, 4, 4, 2, 2, 1, 4, 0, 2, 1, 1, 2, 3, 0, 1, 2,
       2, 4, 2, 2, 3, 2, 0, 0, 2, 3, 3, 3])

In [7]:
np.bincount(df.values.argmax(1))

array([18, 13, 25, 28, 16])

In [8]:
ns = []
ks = []
for i in range(N):
    cmd = f"head -n 1 ../tools/in/{i:04d}.txt"
    nk = !{cmd}
    n, k = map(int, nk[0].split())
    ns.append(n)
    ks.append(k)
df["n"] = ns
df["k"] = ks

In [9]:
df.mean()

radius3       6590.63
erase2        6690.59
gcc           6757.84
attraction    6831.50
juggernaut    6613.95
n               30.45
k                3.50
dtype: float64

In [10]:
df.groupby("k").mean()[columns].style.background_gradient(cmap="YlOrRd", axis=1)

Unnamed: 0_level_0,radius3,erase2,gcc,attraction,juggernaut
k,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2,6255.8,6273.52,6472.84,6462.36,6231.2
3,6841.88,7076.2,7072.08,7096.36,6918.32
4,6775.36,6952.88,6898.6,7028.08,6994.16
5,6489.48,6459.76,6587.84,6739.2,6312.12


In [11]:
df["knn"] = 100 * df["k"] / (df["n"] * df["n"])
df["knn_bin"] = pd.qcut(df["knn"], 5)
df.groupby("knn_bin").mean()[columns].style.background_gradient(cmap="YlOrRd", axis=1)

Unnamed: 0_level_0,radius3,erase2,gcc,attraction,juggernaut
knn_bin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"(0.13, 0.238]",6924.15,7050.0,7077.6,6965.2,6992.65
"(0.238, 0.322]",7162.15,7546.05,7611.6,7592.1,7283.85
"(0.322, 0.448]",7024.55,7187.95,7281.05,7561.9,7440.9
"(0.448, 0.62]",6900.809524,6810.095238,6889.428571,7088.857143,6860.428571
"(0.62, 0.926]",4838.368421,4756.157895,4826.368421,4836.842105,4367.263158
