In [16]:
import glob
import pandas as pd
import pstats

# https://github.com/ssanderson/pstats-view
timing_colnames = [
    "filename",
    "lineno",
    "funcname",
    "ccalls",
    "ncalls",
    "tottime",
    "cumtime",
]

caller_columns = [
    "filename",
    "lineno",
    "funcname",
    "caller_filename",
    "caller_lineno",
    "caller_funcname",
    "ccalls",
    "ncalls",
    "tottime",
    "cumtime",
]

def _calc_frames(stats: pstats.Stats):
    """
    Compute a DataFrame summary of a Stats object.
    """
    timings = []
    callers = []
    for key, values in stats.stats.items():
        timings.append(
            pd.Series(
                key + values[:-1],
                index=timing_colnames,
            )
        )
        for caller_key, caller_values in values[-1].items():
            callers.append(
                pd.Series(
                    key + caller_key + caller_values,
                    index=caller_columns,
                )
            )

    timings_df = pd.DataFrame(timings)
    callers_df = pd.DataFrame(callers)
    timings_df['filename:funcname'] = \
        (timings_df['filename'] + ':' + timings_df['funcname'])
    timings_df = timings_df.groupby('filename:funcname').sum()
    return timings_df, callers_df

all_timings = []
all_callers = []
for file in glob.glob("./profile/*pstats"):
    p = pstats.Stats(file)
    timings_df, callers_df = _calc_frames(p)
    all_timings.append(timings_df)
    all_callers.append(callers_df)

timings_df = pd.concat(all_timings, ignore_index=True)
callers_df = pd.concat(all_callers, ignore_index=True)

In [17]:
timings_df[timings_df.filename == "intra_dedup.py"][
    ["lineno", "funcname", "ccalls", "ncalls", "tottime", "cumtime"]
].groupby(["lineno", "funcname"]).sum().reset_index().sort_values(["cumtime"], ascending=False)

Unnamed: 0,lineno,funcname,ccalls,ncalls,tottime,cumtime
17,184,generate_hash_values,273207,273207,110.464205,586.295896
16,149,ngrams,273207,273207,0.829081,417.744516
10,104,large_star_reduce,2764416,2764416,3.858483,6.355785
3,78,small_star_reduce,2698166,2698166,3.328366,5.268883
18,239,<listcomp>,273207,273207,4.162926,4.498721
20,474,<lambda>,3045933,3045933,0.78815,2.608031
13,112,<lambda>,2764416,2764416,1.396288,1.856704
22,494,<lambda>,3045933,3045933,0.430363,1.561438
6,86,<lambda>,2698166,2698166,1.09918,1.543986
19,315,process_cluster,4797,4797,0.003203,1.464996
