In [1]:
import altair as alt
import glob
import logging
import multiprocessing
import pandas as pd
import re
from tqdm.auto import tqdm
from parse_science_log import parse

In [2]:
log = parse('PROJ17100/RUN0/CLONE0/results0/science.log')

In [3]:
log.get_active_device()

Device(name='GeForce GTX 1080 Ti', vendor='NVIDIA Corporation', version='OpenCL 1.2 CUDA')

In [4]:
log.fah_core_log.average_perf_ns_day

638.783

In [5]:
files = glob.glob('PROJ17100/RUN0/CLONE*/results0/science.log')
len(files)

416

In [6]:
def get_gpu_perf(log_file):
    try:
        p = parse(log_file)
        return p.get_active_device().name, p.fah_core_log.average_perf_ns_day
    except:
        return None

gpu_perf = pd.DataFrame.from_records([get_gpu_perf(f) for f in tqdm(files)],  columns=["gpu_name", "ns_per_day"])

HBox(children=(FloatProgress(value=0.0, max=416.0), HTML(value='')))




In [7]:
gpu_perf.head()

Unnamed: 0,gpu_name,ns_per_day
0,Intel(R) UHD Graphics 630,41.1553
1,gfx906,439.894
2,GeForce GTX 1050 Ti,192.763
3,Intel(R) UHD Graphics 630,43.9251
4,GeForce GTX 1050 Ti,157.807


In [8]:
alt.Chart(gpu_perf).mark_bar().encode(
    x="mean(ns_per_day)",
    y=alt.Y("gpu_name:N", sort='-x')
)

In [9]:
def make_record(**d):
    return {
        'run_id': int(d['run_id']),
        'clone_id': int(d['clone_id']),
        'results_id': int(d['results_id']),
        'gpu_name': d['gpu_name'],
        'ns_per_day': float(d['ns_per_day'])
    }


def get_record(fname):
    pattern = r"PROJ(?P<project_id>[0-9]+)/RUN(?P<run_id>[0-9]+)/CLONE(?P<clone_id>[0-9]+)/results(?P<results_id>[0-9])/science.log"
    params = re.match(pattern, fname).groupdict()
    
    try:
        log = parse(fname)
        gpu_name = log.get_active_device().name
    except Exception as e:
        logging.warning(f"Error parsing {fname}: {e}")
        return []
    
    return [
        make_record(
            gpu_name=gpu_name,
            ns_per_day=log.fah_core_log.average_perf_ns_day,
            **params
        )
    ]

def parse_logs_to_df(files):
    with multiprocessing.Pool() as p:
        results = p.imap(get_record, files)
        recs = [r for res in tqdm(results, total=len(files)) for r in res]
    return pd.DataFrame.from_records(recs)

In [10]:
files = glob.glob('PROJ17100/RUN*/CLONE*/results?/science.log')
len(files)

12649

In [11]:
data = parse_logs_to_df(files)
data.info()

HBox(children=(FloatProgress(value=0.0, max=12649.0), HTML(value='')))














<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12289 entries, 0 to 12288
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   run_id      12289 non-null  int64  
 1   clone_id    12289 non-null  int64  
 2   results_id  12289 non-null  int64  
 3   gpu_name    12289 non-null  object 
 4   ns_per_day  12289 non-null  float64
dtypes: float64(1), int64(3), object(1)
memory usage: 480.2+ KB


In [12]:
data.to_feather('checkpoint')

In [13]:
data = pd.read_feather('checkpoint')

In [14]:
df = (data
 .set_index(['run_id', 'clone_id', 'results_id', 'gpu_name'])
 ['ns_per_day']
 .groupby('gpu_name').filter(lambda x: len(x) > 10)
 .groupby('run_id').transform(lambda x: (x - x.mean()) / x.std()).rename('z_score_by_run')
 .groupby('gpu_name').mean()
).reset_index()

alt.Chart(df).mark_bar().encode(
    x=alt.X("z_score_by_run:Q", aggregate='mean'),
    y=alt.Y("gpu_name:N", sort='-x')
)