In [1]:
import os
import matplotlib.pyplot as plt
import matplotlib
import vaex
import gzip
import ubjson
import pandas as pd
import numpy as np
from glob import glob
import seaborn as sns
from tqdm.auto import tqdm
import matplotlib.patches as mpatches
import time
import sys
import traceback # give me the traceback
import configparser
import json
from pathlib import Path
import asyncio
from joblib import Parallel, delayed
from multiprocessing import Manager
from itertools import chain
import pprint
import dask.dataframe as dd

from smartsim.log import get_logger, log_to_file
logger = get_logger("Plotter")

configs = []

font = {'family' : 'sans',
        'weight' : 'normal',
        'size'   : 14}
matplotlib.rc('font', **font)


In [2]:
class hashableDict(dict):
    def __hash__(self):
        return hash(tuple(sorted(self.items())))

In [9]:


configs = []

functions = ['put_tensor', 'main()', 'client()', 'unpack_tensor']

def fast_flatten(input_list):
    return list(chain.from_iterable(input_list))
    # a = list(chain.from_iterable(input_list))
    # a += [False] * (999999 - len(a))
    # return list(a)

def readCSV(timing_file, config, frames):
    # can't use "engine="pyarrow" because not all features are there
    tmp_df = pd.read_csv(timing_file, header=0, names=["rank", "function", "time"])
    for key, value in config._sections['attributes'].items():
        tmp_df[key] = value
    # print(f"df_list: {df_list}")
    # pprint.pprint(tmp_df)
    # sys.exit()
    frames.append(tmp_df)

def scaling_read_data(full_path):
    # creating a list that can be shared across memory bc normal list was not being shred correctly
    #this is how u do share variables when doing async stuff
    # manager = Manager()
    # df_list = manager.list()
    try:
        frames = list()
        logger.debug("Entered plotter method")
        for run_cfg in Path(full_path).rglob('run.cfg'):
            config = configparser.ConfigParser()
            config.read(run_cfg)
            configs.append(config)
        for config in tqdm(configs, desc="Processing configs...", ncols=80):
            timing_files = Path(config['run']['path']).glob('rank*.csv')
            # NOTE: setting n_jobs to -1 makes it use all available cpus
            timingFiles = tqdm(timing_files, desc="Processing timing files...", ncols=80)
            Parallel(n_jobs=-1, prefer="threads")(delayed(readCSV)(timing_file, config, frames) for timing_file in timingFiles)
        COLUMN_NAMES = frames[0].columns
        # COLUMN_NAMES = [frames[i].columns for i in range (len(frames))]
        # COL_NAMES = list(set(list(chain(*COLUMN_NAMES))))
        df_dict = dict.fromkeys(COLUMN_NAMES, [])
        print(f"columns were {COLUMN_NAMES}")
        for col in COLUMN_NAMES:
            extracted = (frame[col] for frame in frames if col in frame.columns.tolist())
            df_dict[col] = fast_flatten(extracted)
        df = pd.DataFrame.from_dict(df_dict)[COLUMN_NAMES]
        print(f"df: {df}")
    except Exception as e:
        exc_info = sys.exc_info()
        traceback.print_tb(e.__traceback__)
        traceback.print_exception(*exc_info)
        # exc_type, exc_obj, exc_tb = sys.exc_info()
        # fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        # print(exc_type, fname, exc_tb.tb_lineno)
    # with gzip.open(Path("results/" + scaling_test_name + "/stats") / os.path.basename(run_cfg_path) / "dataframe.gz", 'wb') as f:
    #     df.to_json(df, f)
    #df.to_csv(Path("results/" + scaling_test_name + "/stats") / os.path.basename(full_path) / "dataframe.csv.gz", chunksize=100000, encoding='utf-8', index=False, compression='gzip')
    #df.to_pickle(Path("results/" + scaling_test_name + "/stats") / os.path.basename(run_cfg_path) / "dummy.pkl")

    
    

In [10]:
results_path = '../results'
scaling_test = 'throughput-standard-scaling'
run_path = 'run-2023-07-05-21:26:18'
full_path = Path(results_path, scaling_test, run_path)
scaling_read_data(full_path)

Processing configs...:   0%|                            | 0/163 [00:00<?, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

Processing timing files...: 0it [00:00, ?it/s]

columns were Index(['rank', 'function', 'time', 'colocated', 'client_total',
       'client_per_node', 'client_nodes', 'database_nodes', 'database_cpus',
       'iterations', 'tensor_bytes', 'language'],
      dtype='object')
df:           rank       function      time colocated client_total  \
0          655     put_tensor  0.065857         0          768   
1          655  unpack_tensor  0.014630         0          768   
2          655     put_tensor  0.014376         0          768   
3          655  unpack_tensor  0.014243         0          768   
4          655     put_tensor  0.021710         0          768   
...        ...            ...       ...       ...          ...   
65971579  4693  unpack_tensor  0.023524         0         6144   
65971580  4693     put_tensor  0.023253         0         6144   
65971581  4693  unpack_tensor  0.022773         0         6144   
65971582  4693      loop_time  2.670510         0         6144   
65971583  4693         main()  5.088220     

In [6]:
df



  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


: 

: 