# Setup

In [1]:
import pandas as pd
import seaborn as sns
import os
from io import StringIO
import tracemalloc
import linecache
from src.profiling import load_pkl
from src.viz import plot_grouped_barchart, _bytes_to_readable_fmt
import yappi
from velour.client import Client
import warnings

## cprofile

Use this command to analyze CProfile reports in snakeviz:

```
snakeviz utils/profiles/create_groundtruths.cprofile
```

## tracemalloc

In [None]:
tracemalloc_path = 'profiles/create_groundtruths.tracemalloc'
snapshot = tracemalloc.Snapshot.load(tracemalloc_path)
tracemalloc_dct = load_pkl(tracemalloc_path + '.pkl')

In [None]:
def _print_tracemalloc_peaks(dct:dict):
    size_pct = (dct["second_size"]-dct["first_size"])/dct["first_size"]
    print(f'Original size: {_bytes_to_readable_fmt(dct["first_size"], 0)}')
    print(f'Final size: {_bytes_to_readable_fmt(dct["second_size"], 0)}')
    print(f'Percent Change: {size_pct:2%}')
    print('')

    size_pct = (dct["second_peak"]-dct["first_peak"])/dct["first_peak"]
    print(f'Original peak: {_bytes_to_readable_fmt(dct["first_peak"], 0)}')
    print(f'Final peak: {_bytes_to_readable_fmt(dct["second_peak"], 0)}')
    print(f'Percent Change: {size_pct:2%}')


_print_tracemalloc_peaks(tracemalloc_dct)

Original size: 961.7 kB
Final size: 1.1 MB
Percent Change: 20.701585%

Original peak: 1.9 MB
Final peak: 1.6 MB
Percent Change: -13.880409%


In [None]:

def _display_top_tracemalloc(snapshot:tracemalloc.Snapshot, key_type:str='lineno', limit:int=10):
    snapshot = snapshot.filter_traces((
        tracemalloc.Filter(False, "<frozen importlib._bootstrap>"),
        tracemalloc.Filter(False, "<unknown>"),
        tracemalloc.Filter(True, '*/velour/*')
    ))
    top_stats = snapshot.statistics(key_type)

    print("Top %s lines" % limit)
    for index, stat in enumerate(top_stats[:limit], 1):
        frame = stat.traceback[0]
        print("#%s: %s:%s: %.1f KiB"
              % (index, frame.filename, frame.lineno, stat.size / 1024))
        line = linecache.getline(frame.filename, frame.lineno).strip()
        if line:
            print('    loc: %s:%s' % (frame.filename, frame.lineno))
            print('    func: %s' % line)

    other = top_stats[limit:]
    if other:
        size = sum(stat.size for stat in other)
        print("%s other: %.1f KiB" % (len(other), size / 1024))
    total = sum(stat.size for stat in top_stats)
    print("Total allocated size: %.1f KiB" % (total / 1024))

_display_top_tracemalloc(snapshot=snapshot)

Top 10 lines
#1: /Users/nthorlind/git/sw/velour/api/velour_api/backend/core/annotation.py:25: 0.9 KiB
    loc: /Users/nthorlind/git/sw/velour/api/velour_api/backend/core/annotation.py:25
    func: def create_annotation(
#2: /Users/nthorlind/git/sw/velour/api/velour_api/backend/stateflow.py:97: 0.9 KiB
    loc: /Users/nthorlind/git/sw/velour/api/velour_api/backend/stateflow.py:97
    func: def wrapper(*args, **kwargs):
#3: /Users/nthorlind/git/sw/velour/api/velour_api/backend/core/metadata.py:14: 0.8 KiB
    loc: /Users/nthorlind/git/sw/velour/api/velour_api/backend/core/metadata.py:14
    func: def create_metadatum(
#4: /Users/nthorlind/git/sw/velour/api/velour_api/schemas/stateflow.py:160: 0.6 KiB
    loc: /Users/nthorlind/git/sw/velour/api/velour_api/schemas/stateflow.py:160
    func: def set_dataset_status(self, dataset_name: str, status: State):
#5: /Users/nthorlind/git/sw/velour/api/velour_api/backend/query/groundtruth.py:8: 0.5 KiB
    loc: /Users/nthorlind/git/sw/velour/api/velo

## yappi

Difficult to visualize without KCacheGrind or outside tools.

In [None]:
stats = yappi.get_func_stats()
stats.add("profiles/create_groundtruths.yappi")

stats.sort("tsub", "desc").print_all()


Clock type: WALL
Ordered by: tsub, desc

name                                  ncall  tsub      ttot      tavg      
..hon3.11/asyncio/tasks.py:627 sleep  17     1.511360  1.511749  0.088926
..y:311 _set_result_unless_cancelled  17     0.000153  0.000413  0.000024
..icorn/server.py:235 Server.on_tick  17     0.000125  0.000246  0.000014
..tils/src/profiling.py:86 wrap_func  2/1    0.000038  1.658119  0.829059
..3.11/email/utils.py:126 formatdate  2      0.000019  0.000107  0.000053
...py:117 _format_timetuple_and_zone  2      0.000018  0.000018  0.000009
..email/utils.py:155 format_datetime  2      0.000013  0.000060  0.000030
..thon3.11/copyreg.py:113 _slotnames  4      0.000011  0.000017  0.000004
..orn/server.py:226 Server.main_loop  1      0.000000  0.000000  0.000000
../uvicorn/server.py:63 Server.serve  1      0.000000  0.000000  0.000000


## scalene

In [2]:
from src.profiling import generate_docker_snapshot
df3 = generate_docker_snapshot()
df3



  return pd.read_csv(
  return pd.read_csv(


[{'id': '086d52266bae',
  'image': 'redis',
  'disk_space': '0B (virtual 183MB)',
  'cpu_util': ' 0.46%',
  'mem_util': ' 0.14%'},
 {'id': 'e3aa70f77745',
  'image': 'postgis/postgis',
  'disk_space': '63B (virtual 593MB)',
  'cpu_util': ' 0.00%',
  'mem_util': ' 2.70%'}]

In [5]:
import io


  df = pd.read_csv(


Unnamed: 0,pid,name
0,3402,pensive_chatelet
1,3567,sweet_ride


In [43]:
df3

[{'id': '086d52266bae',
  'image': 'redis',
  'disk_space': '0B (virtual 183MB)',
  'cpu_util': ' 0.36%',
  'mem_util': ' 0.14%'},
 {'id': 'e3aa70f77745',
  'image': 'postgis/postgis',
  'disk_space': '63B (virtual 593MB)',
  'cpu_util': ' 0.00%',
  'mem_util': ' 2.70%'}]

  pd.read_csv(string_tsv, sep='    ', header=0, names=['id', 'image', 'disk_space'])


Unnamed: 0,id,image,disk_space
0,086d52266bae,redis,0B (virtual 183MB)
1,e3aa70f77745,postgis/postgis,63B (virtual 593MB)


In [51]:
pd.read_csv('blah.tsv', sep='\t', header=0)

Unnamed: 0,TYPE TOTAL ACTIVE SIZE RECLAIMABLE
0,Images 3 3 1.269GB ...
1,Containers 3 2 71.45MB ...
2,Local Volumes 10 2 10.28GB ...
3,Build Cache 0 0 0B 0B
