# parse_logfile

This dict is the result of serialising (then cleaning) a logfile which is dumped when the assertion on line ~700 fails:
```
assert target.name not in self.converter._function_definitions, target.name
```

Look at this dict, parse how the three relevant objects are filled (really two are objects, one is a call history)

In [1]:
import json


from typed_python import SerializationContext

In [2]:
SERIALIZED_INPUT = '/home/wgrant/Dev/typed_python/demos/python_s_slow_u_slice_u_3/logfile_20221019.bytes'
# SERIALIZED_INPUT = '/home/wgrant/Dev/typed_python/logfile.bytes'

In [3]:
!ls -l {SERIALIZED_INPUT}

-rw-r--r-- 1 wgrant wgrant 156745 Oct 27 09:54 /home/wgrant/Dev/typed_python/demos/python_s_slow_u_slice_u_3/logfile_20221019.bytes


In [4]:
with open(SERIALIZED_INPUT, 'rb') as flines:
    input_data = SerializationContext().deserialize(flines.read())

In [5]:
# PATH_TO_INPUT = '/home/wgrant/Dev/typed_python/demos/failure_dict.json'
# with open(PATH_TO_INPUT) as flines:
#     input_data = json.load(flines)

In [6]:
for key, value in input_data.items():
    print(key, len(value))

history 5
end_state 4
compiler_cache 3
bug_test 4


In [7]:
import pandas as pd 

In [8]:
pd.set_option('display.max_rows', 2000)

## History

In [9]:
def parse_dict_to_dataframe(input_dict: dict) -> pd.DataFrame:
    input_data_squared = []
    for key, value in input_dict.items():
        for x in value:
            try:
                timestamp, thread, data = x
            except TypeError:
                timestamp, thread, data = x, None
            input_data_squared.append((pd.to_datetime(timestamp, unit='s'), thread, key, data))
    input_df = pd.DataFrame(input_data_squared, columns=['timestamp', 'thread', 'key', 'data']).sort_values(by='timestamp').reset_index(drop=True)
    
    # extract the callTarget data
    callTarget_data = pd.json_normalize(input_df.query('key=="namedCallTargetToLLVM"').data).set_index(input_df.query('key=="namedCallTargetToLLVM"').index)
    callTarget_data.columns = ['name', 'namedCallTargetToLLVM.external']
    parsed_df = pd.merge(input_df, callTarget_data, how='left', left_index=True, right_index=True)
    parsed_df.loc[parsed_df.key=="namedCallTargetToLLVM", 'data'] = None
    # set the name field as data for function_definitions
    parsed_df.loc[parsed_df.key=="function_definitions", 'name'] = parsed_df.loc[parsed_df.key=="function_definitions", :].data  
    parsed_df.loc[parsed_df.key=="external_function_references", 'name'] = parsed_df.loc[parsed_df.key=="external_function_references", :].data  
    # blow up the externallyDefinedFunctionTypes
    parsed_df = parsed_df.explode('data')
    parsed_df.loc[parsed_df.key=="externallyDefinedFunctionTypes", 'name'] = parsed_df.loc[parsed_df.key=="externallyDefinedFunctionTypes", :].data
    # grab entryPoint names
    parsed_df.loc[parsed_df.key=="entryPoint", 'name'] = input_df.loc[input_df.key=='entryPoint', 'data'].apply(lambda x: x['name'])
    
    parsed_df = parsed_df.drop(columns=['data']).reset_index(drop=True)
    return parsed_df, input_df

In [10]:
df, input_df = parse_dict_to_dataframe(input_data['history'])

In [11]:
import numpy as np

In [12]:
def bold(row):
    if not pd.isnull(row['name']) and 'decref_str' in row['name']:
        return ['background-color: #1b9e77'] * len(row)
    elif row['name'] == 'CLEAR':
        return ['background-color: #d95f02'] * len(row)
        
    else:
        return [None] * len(row)

In [13]:
df.style.apply(bold, axis=1)

Unnamed: 0,timestamp,thread,key,name,namedCallTargetToLLVM.external
0,2022-10-19 18:13:45.306560256,1429,function_definitions,CLEAR,
1,2022-10-19 18:13:45.306564352,1429,externallyDefinedFunctionTypes,CLEAR,
2,2022-10-19 18:13:45.306664704,1429,entryPoint,getNonblocking,
3,2022-10-19 18:13:45.306861312,1429,entryPoint,get,
4,2022-10-19 18:13:45.306999296,1429,entryPoint,getMany,
5,2022-10-19 18:13:45.307198208,1429,entryPoint,put,
6,2022-10-19 18:13:45.307379968,1429,entryPoint,putMany,
7,2022-10-19 18:13:45.307527936,1429,entryPoint,peek,
8,2022-10-19 18:13:45.307644416,1429,entryPoint,__len__,
9,2022-10-19 18:13:45.309083136,1429,entryPoint,workExecutor,


In [14]:
df[df.key == 'external_function_references']

Unnamed: 0,timestamp,thread,key,name,namedCallTargetToLLVM.external
397,2022-10-19 18:13:48.876991488,1474,external_function_references,tp_free,
402,2022-10-19 18:13:48.881567488,1474,external_function_references,nativepython_float64_to_string,
405,2022-10-19 18:13:48.882616576,1474,external_function_references,nativepython_int64_to_string,
408,2022-10-19 18:13:48.884186112,1474,external_function_references,np_runtime_to_pyobj,
410,2022-10-19 18:13:48.884502784,1474,external_function_references,np_try_pyobj_to_str,
412,2022-10-19 18:13:48.884772352,1474,external_function_references,np_raise_exception_fastpath,
415,2022-10-19 18:13:48.885440512,1474,external_function_references,runtime.decref_object.e7079dbf5b98d3fb81b9d12f...,
420,2022-10-19 18:13:48.887154944,1474,external_function_references,np_add_traceback,
423,2022-10-19 18:13:48.889088000,1474,external_function_references,nativepython_runtime_rshift_int64_int64,
427,2022-10-19 18:13:48.891405824,1474,external_function_references,nativepython_runtime_string_cmp,


In [15]:
df[(df.name.notnull() & df.name.str.contains('decref_str')) | (df.key == 'external_function_references')]

Unnamed: 0,timestamp,thread,key,name,namedCallTargetToLLVM.external
390,2022-10-19 18:13:48.869644544,1474,function_definitions,runtime.decref_str.7272ea99c1f321ede541a5d770d...,
394,2022-10-19 18:13:48.875877888,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...,False
397,2022-10-19 18:13:48.876991488,1474,external_function_references,tp_free,
400,2022-10-19 18:13:48.879414784,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...,False
402,2022-10-19 18:13:48.881567488,1474,external_function_references,nativepython_float64_to_string,
403,2022-10-19 18:13:48.882052864,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...,False
405,2022-10-19 18:13:48.882616576,1474,external_function_references,nativepython_int64_to_string,
406,2022-10-19 18:13:48.883041280,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...,False
408,2022-10-19 18:13:48.884186112,1474,external_function_references,np_runtime_to_pyobj,
410,2022-10-19 18:13:48.884502784,1474,external_function_references,np_try_pyobj_to_str,


In [16]:
repeated_names = df[df.key.isin(['function_definitions', 'externallyDefinedFunctionTypes'])].name.value_counts()

In [17]:
repeated_names = repeated_names[repeated_names > 1].index

In [18]:
repeated_names

Index(['.get_global_variables', 'CLEAR',
       'runtime.decref_str.7272ea99c1f321ede541a5d770ddceac4eb071e9',
       'runtime.decref_bytes.631fa490bf2d97ed9f3634e3e07ddbb16af88614',
       'runtime.decref_OneOf(None, False, True, float, int, str, bytes).c200cf5f8c96f141827717b8c0e9acaa0b4352b8'],
      dtype='object')

In [19]:
df[df.name.isin(repeated_names)]

Unnamed: 0,timestamp,thread,key,name,namedCallTargetToLLVM.external
0,2022-10-19 18:13:45.306560256,1429,function_definitions,CLEAR,
1,2022-10-19 18:13:45.306564352,1429,externallyDefinedFunctionTypes,CLEAR,
385,2022-10-19 18:13:48.819718656,1474,externallyDefinedFunctionTypes,.get_global_variables,
387,2022-10-19 18:13:48.869465344,1474,function_definitions,runtime.decref_bytes.631fa490bf2d97ed9f3634e3e...,
389,2022-10-19 18:13:48.869595136,1474,function_definitions,"runtime.decref_OneOf(None, False, True, float,...",
390,2022-10-19 18:13:48.869644544,1474,function_definitions,runtime.decref_str.7272ea99c1f321ede541a5d770d...,
394,2022-10-19 18:13:48.875877888,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...,False
395,2022-10-19 18:13:48.876302080,1474,namedCallTargetToLLVM,runtime.decref_bytes.631fa490bf2d97ed9f3634e3e...,False
400,2022-10-19 18:13:48.879414784,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...,False
403,2022-10-19 18:13:48.882052864,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...,False


## End State

In [20]:
def parse_dict_to_end_state_dataframe(input_dict): 
    protodf = []
    for key, value in input_dict['end_state'].items():
        if key == 'namedCallTargetToLLVM':
            continue
        timestamp, thread, function_names = value
        for name in function_names:
            protodf.append((pd.to_datetime(timestamp, unit='s'), thread, key, name))
    end_state = pd.DataFrame(protodf, columns=['timestamp', 'thread', 'key', 'name']).sort_values(by=['key', 'name'])
    return end_state

In [21]:
parse_dict_to_end_state_dataframe(input_data).style.apply(bold,axis=1) 

Unnamed: 0,timestamp,thread,key,name
440,2022-10-19 18:13:49.640532992,1540,external_function_references,__cxa_allocate_exception
443,2022-10-19 18:13:49.640532992,1540,external_function_references,__cxa_begin_catch
442,2022-10-19 18:13:49.640532992,1540,external_function_references,__cxa_end_catch
441,2022-10-19 18:13:49.640532992,1540,external_function_references,__cxa_throw
444,2022-10-19 18:13:49.640532992,1540,external_function_references,__gxx_personality_v0
5,2022-10-19 18:13:49.640519680,1540,externallyDefinedFunctionTypes,.get_global_variables
341,2022-10-19 18:13:49.640519680,1540,externallyDefinedFunctionTypes,runtime.append(ListOf(Int32)).86e5000925a8c3c25a1d9cdb0d9f4602fcb524a6
275,2022-10-19 18:13:49.640519680,1540,externallyDefinedFunctionTypes,runtime.append(ListOf(Node)).20dfb795bdb064cdfb3091b036e80ab83c685583
254,2022-10-19 18:13:49.640519680,1540,externallyDefinedFunctionTypes,runtime.append(ListOf(Node)).bcb88788ed2c2cc6adccfe3cdee8e7b44977e6d2
48,2022-10-19 18:13:49.640519680,1540,externallyDefinedFunctionTypes,runtime.append(ListOf(SocketAddress)).95255a3e5d02aafa1a5d0549a6e50e271bd7515d


In [22]:
input_data['end_state']['namedCallTargetToLLVM']

[1666203229.6405346,
 1540,
 {'name': 'runtime.decref_str.7272ea99c1f321ede541a5d770ddceac4eb071e9',
  'external': False}]

## Compiler Stuff

In [23]:
for key, value in input_data.items():
    print(key, len(value))

history 5
end_state 4
compiler_cache 3
bug_test 4


In [24]:
def parse_dict_to_compiler_cache_dataframe(input_dict): 
    protodf = []
    for key, value in input_dict['compiler_cache'].items():
        for timestamp, thread, function_name in value:
            protodf.append((pd.to_datetime(timestamp, unit='s'), thread, key, function_name))
    compiler_cache = pd.DataFrame(protodf, columns=['timestamp', 'thread', 'key', 'name']).sort_values(by='timestamp')
    return compiler_cache

In [25]:
compiler_df =parse_dict_to_compiler_cache_dataframe(input_data)

In [26]:
compiler_df[compiler_df.name.str.contains('decref_str')]

Unnamed: 0,timestamp,thread,key,name
10,2022-10-19 18:13:48.846901248,1474,allDefinedNames,runtime.decref_str.7272ea99c1f321ede541a5d770d...
863,2022-10-19 18:13:48.850128128,1474,load,runtime.decref_str.7272ea99c1f321ede541a5d770d...
1048,2022-10-19 18:13:49.298908160,1474,load,runtime.decref_str.7272ea99c1f321ede541a5d770d...
196,2022-10-19 18:13:49.299389184,1474,allDefinedNames,runtime.decref_str.7272ea99c1f321ede541a5d770d...
634,2022-10-19 18:13:49.299389952,1474,allCachedNames,runtime.decref_str.7272ea99c1f321ede541a5d770d...


In [27]:
compiler_df.style.apply(bold,axis=1)

Unnamed: 0,timestamp,thread,key,name
0,2022-10-19 18:13:48.817756928,1474,allDefinedNames,tp.assertAllFinite.2fc848d803f18a85389bece607b46c9987aaa86a
854,2022-10-19 18:13:48.819695872,1474,load,tp.assertAllFinite.2fc848d803f18a85389bece607b46c9987aaa86a
855,2022-10-19 18:13:48.819699968,1474,load,runtime.destructor_.d90c619324e9823706e91ba9dde938a57a79c6af
856,2022-10-19 18:13:48.819700992,1474,load,runtime.decref_object.e7079dbf5b98d3fb81b9d12f476d904d1c00e924
857,2022-10-19 18:13:48.819702016,1474,load,runtime.decref_ListOf(float).393e4a36214d4764b785f64dc3fb952e231ab37e
858,2022-10-19 18:13:48.819703296,1474,load,tp.assertAllFinite.2fc848d803f18a85389bece607b46c9987aaa86a
859,2022-10-19 18:13:48.819704576,1474,load,tp.assertAllFinite.2fc848d803f18a85389bece607b46c9987aaa86a.dispatch
860,2022-10-19 18:13:48.819705600,1474,load,.get_global_variables
1,2022-10-19 18:13:48.819723776,1474,allDefinedNames,runtime.destructor_.d90c619324e9823706e91ba9dde938a57a79c6af
447,2022-10-19 18:13:48.819724800,1474,allCachedNames,runtime.destructor_.d90c619324e9823706e91ba9dde938a57a79c6af


## Bug Test - 


In [28]:
def parse_dict_to_bug_test_dataframe(input_dict): 
    protodf = []
    for timestamp, thread, value in input_dict['bug_test']:
        for key, vals in value.items():
            for function_name in vals:
                protodf.append((pd.to_datetime(timestamp, unit='s'), thread, key, function_name))
    bug_test = pd.DataFrame(protodf, columns=['timestamp', 'thread', 'key', 'name']).sort_values(by=['timestamp', 'key']).reset_index(drop=True)
    return bug_test

In [29]:
bug_test_df = parse_dict_to_bug_test_dataframe(input_data)

In [30]:

bug_test_df[bug_test_df.name.str.contains('decref_str')]

Unnamed: 0,timestamp,thread,key,name
19,2022-10-19 18:13:49.299036416,1474,definedNames,runtime.decref_str.7272ea99c1f321ede541a5d770d...
208,2022-10-19 18:13:49.299036416,1474,markExternal,runtime.decref_str.7272ea99c1f321ede541a5d770d...
450,2022-10-19 18:13:49.303641088,1474,cachedNames,runtime.decref_str.7272ea99c1f321ede541a5d770d...
795,2022-10-19 18:13:49.303641088,1474,definedNames,runtime.decref_str.7272ea99c1f321ede541a5d770d...
1149,2022-10-19 18:13:49.636368384,1540,cachedNames,runtime.decref_str.7272ea99c1f321ede541a5d770d...
1503,2022-10-19 18:13:49.636368384,1540,definedNames,runtime.decref_str.7272ea99c1f321ede541a5d770d...


In [31]:
bug_test_df.style.apply(bold, axis=1)

Unnamed: 0,timestamp,thread,key,name
0,2022-10-19 18:13:48.819706624,1474,definedNames,tp.assertAllFinite.2fc848d803f18a85389bece607b46c9987aaa86a
1,2022-10-19 18:13:48.819706624,1474,markExternal,runtime.destructor_.d90c619324e9823706e91ba9dde938a57a79c6af
2,2022-10-19 18:13:48.819706624,1474,markExternal,runtime.decref_object.e7079dbf5b98d3fb81b9d12f476d904d1c00e924
3,2022-10-19 18:13:48.819706624,1474,markExternal,runtime.decref_ListOf(float).393e4a36214d4764b785f64dc3fb952e231ab37e
4,2022-10-19 18:13:48.819706624,1474,markExternal,tp.assertAllFinite.2fc848d803f18a85389bece607b46c9987aaa86a
5,2022-10-19 18:13:48.819706624,1474,markExternal,tp.assertAllFinite.2fc848d803f18a85389bece607b46c9987aaa86a.dispatch
6,2022-10-19 18:13:48.819706624,1474,markExternal,.get_global_variables
7,2022-10-19 18:13:49.299036416,1474,cachedNames,runtime.decref_object.e7079dbf5b98d3fb81b9d12f476d904d1c00e924
8,2022-10-19 18:13:49.299036416,1474,cachedNames,runtime.destructor_.d90c619324e9823706e91ba9dde938a57a79c6af
9,2022-10-19 18:13:49.299036416,1474,cachedNames,runtime.decref_ListOf(float).393e4a36214d4764b785f64dc3fb952e231ab37e


In [32]:
rows = []
for index, row in bug_test_df.query('key=="markExternal"').iterrows():
    name = row['name']
    preceding_rows = bug_test_df.iloc[:index].query('name==@name and key=="definedNames"')
    if len(preceding_rows):   
        # rows.append(pd.concat([preceding_rows, row]))
        rows.append(preceding_rows.append(row))

  rows.append(preceding_rows.append(row))
  rows.append(preceding_rows.append(row))
  rows.append(preceding_rows.append(row))
  rows.append(preceding_rows.append(row))
  rows.append(preceding_rows.append(row))
  rows.append(preceding_rows.append(row))
  rows.append(preceding_rows.append(row))
  rows.append(preceding_rows.append(row))
  rows.append(preceding_rows.append(row))
  rows.append(preceding_rows.append(row))


In [33]:
defined_then_loaded = pd.concat(rows)

In [34]:
defined_then_loaded

Unnamed: 0,timestamp,thread,key,name
0,2022-10-19 18:13:48.819706624,1474,definedNames,tp.assertAllFinite.2fc848d803f18a85389bece607b...
4,2022-10-19 18:13:48.819706624,1474,markExternal,tp.assertAllFinite.2fc848d803f18a85389bece607b...
22,2022-10-19 18:13:49.299036416,1474,definedNames,.get_global_variables
34,2022-10-19 18:13:49.299036416,1474,markExternal,.get_global_variables
19,2022-10-19 18:13:49.299036416,1474,definedNames,runtime.decref_str.7272ea99c1f321ede541a5d770d...
208,2022-10-19 18:13:49.299036416,1474,markExternal,runtime.decref_str.7272ea99c1f321ede541a5d770d...
15,2022-10-19 18:13:49.299036416,1474,definedNames,runtime.decref_bytes.631fa490bf2d97ed9f3634e3e...
241,2022-10-19 18:13:49.299036416,1474,markExternal,runtime.decref_bytes.631fa490bf2d97ed9f3634e3e...
18,2022-10-19 18:13:49.299036416,1474,definedNames,"runtime.decref_OneOf(None, False, True, float,..."
282,2022-10-19 18:13:49.299036416,1474,markExternal,"runtime.decref_OneOf(None, False, True, float,..."


In [35]:
defined_then_loaded_names = defined_then_loaded['name'].unique()

In [36]:
print("\n".join(sorted(defined_then_loaded['name'].unique())))

.get_global_variables
runtime.decref_OneOf(None, False, True, float, int, str, bytes).c200cf5f8c96f141827717b8c0e9acaa0b4352b8
runtime.decref_Set(str).d61832338c386eac039b73e57d36514ee53bafd8
runtime.decref_bytes.631fa490bf2d97ed9f3634e3e07ddbb16af88614
runtime.decref_str.7272ea99c1f321ede541a5d770ddceac4eb071e9
tp.assertAllFinite.2fc848d803f18a85389bece607b46c9987aaa86a
tp.extend.204dd25b1bed980846a34730e59cac38906a757a
tp.toListOf.0cec974228c22536e92c54289efa8c49a4779284


In [37]:
print("\n".join(sorted(repeated_names)))

.get_global_variables
CLEAR
runtime.decref_OneOf(None, False, True, float, int, str, bytes).c200cf5f8c96f141827717b8c0e9acaa0b4352b8
runtime.decref_bytes.631fa490bf2d97ed9f3634e3e07ddbb16af88614
runtime.decref_str.7272ea99c1f321ede541a5d770ddceac4eb071e9


In [38]:
defined_then_loaded

Unnamed: 0,timestamp,thread,key,name
0,2022-10-19 18:13:48.819706624,1474,definedNames,tp.assertAllFinite.2fc848d803f18a85389bece607b...
4,2022-10-19 18:13:48.819706624,1474,markExternal,tp.assertAllFinite.2fc848d803f18a85389bece607b...
22,2022-10-19 18:13:49.299036416,1474,definedNames,.get_global_variables
34,2022-10-19 18:13:49.299036416,1474,markExternal,.get_global_variables
19,2022-10-19 18:13:49.299036416,1474,definedNames,runtime.decref_str.7272ea99c1f321ede541a5d770d...
208,2022-10-19 18:13:49.299036416,1474,markExternal,runtime.decref_str.7272ea99c1f321ede541a5d770d...
15,2022-10-19 18:13:49.299036416,1474,definedNames,runtime.decref_bytes.631fa490bf2d97ed9f3634e3e...
241,2022-10-19 18:13:49.299036416,1474,markExternal,runtime.decref_bytes.631fa490bf2d97ed9f3634e3e...
18,2022-10-19 18:13:49.299036416,1474,definedNames,"runtime.decref_OneOf(None, False, True, float,..."
282,2022-10-19 18:13:49.299036416,1474,markExternal,"runtime.decref_OneOf(None, False, True, float,..."


In [39]:
# merged[merged.name.isin(defined_then_loaded['name'].unique())].style

In [40]:
repeated_names

Index(['.get_global_variables', 'CLEAR',
       'runtime.decref_str.7272ea99c1f321ede541a5d770ddceac4eb071e9',
       'runtime.decref_bytes.631fa490bf2d97ed9f3634e3e07ddbb16af88614',
       'runtime.decref_OneOf(None, False, True, float, int, str, bytes).c200cf5f8c96f141827717b8c0e9acaa0b4352b8'],
      dtype='object')

In [41]:
bug_test_df

Unnamed: 0,timestamp,thread,key,name
0,2022-10-19 18:13:48.819706624,1474,definedNames,tp.assertAllFinite.2fc848d803f18a85389bece607b...
1,2022-10-19 18:13:48.819706624,1474,markExternal,runtime.destructor_<class 'ListOf(float)'>.d90...
2,2022-10-19 18:13:48.819706624,1474,markExternal,runtime.decref_object.e7079dbf5b98d3fb81b9d12f...
3,2022-10-19 18:13:48.819706624,1474,markExternal,runtime.decref_ListOf(float).393e4a36214d4764b...
4,2022-10-19 18:13:48.819706624,1474,markExternal,tp.assertAllFinite.2fc848d803f18a85389bece607b...
5,2022-10-19 18:13:48.819706624,1474,markExternal,tp.assertAllFinite.2fc848d803f18a85389bece607b...
6,2022-10-19 18:13:48.819706624,1474,markExternal,.get_global_variables
7,2022-10-19 18:13:49.299036416,1474,cachedNames,runtime.decref_object.e7079dbf5b98d3fb81b9d12f...
8,2022-10-19 18:13:49.299036416,1474,cachedNames,runtime.destructor_<class 'ListOf(float)'>.d90...
9,2022-10-19 18:13:49.299036416,1474,cachedNames,runtime.decref_ListOf(float).393e4a36214d4764b...


## Combo

In [42]:
history_df = df.drop(columns='namedCallTargetToLLVM.external')

In [43]:
history_df.shape

(1033, 4)

In [44]:
compiler_df.shape

(1279, 4)

In [45]:
merged = pd.concat([history_df, compiler_df]).sort_values(by=['timestamp', 'key', 'name'])

In [46]:
merged

Unnamed: 0,timestamp,thread,key,name
0,2022-10-19 18:13:45.306560256,1429,function_definitions,CLEAR
1,2022-10-19 18:13:45.306564352,1429,externallyDefinedFunctionTypes,CLEAR
2,2022-10-19 18:13:45.306664704,1429,entryPoint,getNonblocking
3,2022-10-19 18:13:45.306861312,1429,entryPoint,get
4,2022-10-19 18:13:45.306999296,1429,entryPoint,getMany
...,...,...,...,...
1028,2022-10-19 18:13:49.638712320,1540,function_definitions,tp.__init__.47c30c8605c8f24f677800ca74c1de6bb6...
1029,2022-10-19 18:13:49.638841600,1540,function_definitions,tp.__init__.47c30c8605c8f24f677800ca74c1de6bb6...
1030,2022-10-19 18:13:49.638929408,1540,function_definitions,runtime.destructor_<class 'Dict(str->Set(str))...
1031,2022-10-19 18:13:49.639250944,1540,namedCallTargetToLLVM,runtime.destructor_<class 'Dict(str->Set(str))...


In [47]:
merged[merged.name.str.contains('decref_str')]

Unnamed: 0,timestamp,thread,key,name
10,2022-10-19 18:13:48.846901248,1474,allDefinedNames,runtime.decref_str.7272ea99c1f321ede541a5d770d...
863,2022-10-19 18:13:48.850128128,1474,load,runtime.decref_str.7272ea99c1f321ede541a5d770d...
390,2022-10-19 18:13:48.869644544,1474,function_definitions,runtime.decref_str.7272ea99c1f321ede541a5d770d...
394,2022-10-19 18:13:48.875877888,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...
400,2022-10-19 18:13:48.879414784,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...
403,2022-10-19 18:13:48.882052864,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...
406,2022-10-19 18:13:48.883041280,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...
413,2022-10-19 18:13:48.885219840,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...
418,2022-10-19 18:13:48.886705664,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...
424,2022-10-19 18:13:48.889829632,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...


In [48]:
merged[merged.key.isin(['namedCallTargetToLLVM', 'external_function_references'])]

Unnamed: 0,timestamp,thread,key,name
394,2022-10-19 18:13:48.875877888,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...
395,2022-10-19 18:13:48.876302080,1474,namedCallTargetToLLVM,runtime.decref_bytes.631fa490bf2d97ed9f3634e3e...
396,2022-10-19 18:13:48.876950528,1474,namedCallTargetToLLVM,tp_free
397,2022-10-19 18:13:48.876991488,1474,external_function_references,tp_free
398,2022-10-19 18:13:48.877456640,1474,namedCallTargetToLLVM,tp_free
399,2022-10-19 18:13:48.878050560,1474,namedCallTargetToLLVM,tp.const_dict_getitem.3af055c8e89da0e2f7169169...
400,2022-10-19 18:13:48.879414784,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...
401,2022-10-19 18:13:48.881526016,1474,namedCallTargetToLLVM,nativepython_float64_to_string
402,2022-10-19 18:13:48.881567488,1474,external_function_references,nativepython_float64_to_string
403,2022-10-19 18:13:48.882052864,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...


In [49]:
df[df['namedCallTargetToLLVM.external'] == False]

Unnamed: 0,timestamp,thread,key,name,namedCallTargetToLLVM.external
394,2022-10-19 18:13:48.875877888,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...,False
395,2022-10-19 18:13:48.876302080,1474,namedCallTargetToLLVM,runtime.decref_bytes.631fa490bf2d97ed9f3634e3e...,False
399,2022-10-19 18:13:48.878050560,1474,namedCallTargetToLLVM,tp.const_dict_getitem.3af055c8e89da0e2f7169169...,False
400,2022-10-19 18:13:48.879414784,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...,False
403,2022-10-19 18:13:48.882052864,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...,False
406,2022-10-19 18:13:48.883041280,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...,False
413,2022-10-19 18:13:48.885219840,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...,False
414,2022-10-19 18:13:48.885385984,1474,namedCallTargetToLLVM,runtime.decref_object.e7079dbf5b98d3fb81b9d12f...,False
416,2022-10-19 18:13:48.885708032,1474,namedCallTargetToLLVM,runtime.decref_object.e7079dbf5b98d3fb81b9d12f...,False
417,2022-10-19 18:13:48.886487040,1474,namedCallTargetToLLVM,"runtime.decref_OneOf(None, False, True, float,...",False


In [50]:
merged

Unnamed: 0,timestamp,thread,key,name
0,2022-10-19 18:13:45.306560256,1429,function_definitions,CLEAR
1,2022-10-19 18:13:45.306564352,1429,externallyDefinedFunctionTypes,CLEAR
2,2022-10-19 18:13:45.306664704,1429,entryPoint,getNonblocking
3,2022-10-19 18:13:45.306861312,1429,entryPoint,get
4,2022-10-19 18:13:45.306999296,1429,entryPoint,getMany
...,...,...,...,...
1028,2022-10-19 18:13:49.638712320,1540,function_definitions,tp.__init__.47c30c8605c8f24f677800ca74c1de6bb6...
1029,2022-10-19 18:13:49.638841600,1540,function_definitions,tp.__init__.47c30c8605c8f24f677800ca74c1de6bb6...
1030,2022-10-19 18:13:49.638929408,1540,function_definitions,runtime.destructor_<class 'Dict(str->Set(str))...
1031,2022-10-19 18:13:49.639250944,1540,namedCallTargetToLLVM,runtime.destructor_<class 'Dict(str->Set(str))...


In [51]:
# check end_state to debug how on earth NCT2LLVM got called thrice

In [52]:
df.iloc[-100:]

Unnamed: 0,timestamp,thread,key,name,namedCallTargetToLLVM.external
933,2022-10-19 18:13:49.496111360,1474,function_definitions,runtime.type_convert(str -> OneOf(ConstDict(On...,
934,2022-10-19 18:13:49.496242432,1474,function_definitions,tp.const_dict_get.44c4899e64c357259fc05fa7c203...,
935,2022-10-19 18:13:49.496285440,1474,function_definitions,runtime.oneof_binop_reverse.2b65355ded8643ff3b...,
936,2022-10-19 18:13:49.496359168,1474,function_definitions,runtime.oneof_binop_reverse.7a98c8a364fda6e3bf...,
937,2022-10-19 18:13:49.496428544,1474,function_definitions,runtime.oneof_binop_reverse.508ef502a94019a292...,
938,2022-10-19 18:13:49.496497664,1474,function_definitions,runtime.type_convert(bytes -> OneOf(ConstDict(...,
939,2022-10-19 18:13:49.496563456,1474,function_definitions,runtime.copy_initialize_OneOf(ConstDict(OneOf(...,
940,2022-10-19 18:13:49.496602112,1474,function_definitions,"runtime.decref_OneOf(ConstDict(OneOf(None, Fal...",
941,2022-10-19 18:13:49.496649216,1474,function_definitions,runtime.oneof_binop_reverse.e398f56778abc69e7f...,
942,2022-10-19 18:13:49.496721152,1474,function_definitions,runtime.oneof_attribute.8489231193735127a3d6ec...,


In [53]:
end_state_df = parse_dict_to_end_state_dataframe(input_data)

In [54]:
end_state_df[end_state_df.name.str.contains('checkSetSizeAndThrow')]

Unnamed: 0,timestamp,thread,key,name
403,2022-10-19 18:13:49.640519680,1540,externallyDefinedFunctionTypes,tp.checkSetSizeAndThrowIfChanged.f1fba12099416...


In [55]:
end_state_df.query("key=='function_definitions'")

Unnamed: 0,timestamp,thread,key,name
427,2022-10-19 18:13:49.640530176,1540,function_definitions,runtime.copy_initialize_OneOf(ConstDict(OneOf(...
410,2022-10-19 18:13:49.640530176,1540,function_definitions,"runtime.copy_initialize_OneOf(None, False, Tru..."
436,2022-10-19 18:13:49.640530176,1540,function_definitions,runtime.decref_Dict(str->Set(str)).54e24c9a81e...
428,2022-10-19 18:13:49.640530176,1540,function_definitions,"runtime.decref_OneOf(ConstDict(OneOf(None, Fal..."
406,2022-10-19 18:13:49.640530176,1540,function_definitions,"runtime.decref_OneOf(None, False, True, float,..."
404,2022-10-19 18:13:49.640530176,1540,function_definitions,runtime.decref_bytes.631fa490bf2d97ed9f3634e3e...
407,2022-10-19 18:13:49.640530176,1540,function_definitions,runtime.decref_str.7272ea99c1f321ede541a5d770d...
439,2022-10-19 18:13:49.640530176,1540,function_definitions,runtime.destructor_<class 'Dict(str->Set(str))...
430,2022-10-19 18:13:49.640530176,1540,function_definitions,runtime.oneof_attribute.8489231193735127a3d6ec...
432,2022-10-19 18:13:49.640530176,1540,function_definitions,runtime.oneof_binop_Lt.e404769b7bf83a8e01b194a...


So what happened:
-  defineLinkName called with RDS - added to `_allDefinedNames_` and then loadFromCompilerCache hit. symbol found in the cache but either callTargetsAndTypes was None, or newNativeFunctionTypes is empty.
- then RDS added to function_definitions.
- call NCT2LLVM a bunch of times. 
- then load RDS again, as a result of loadFromCompilerCaching a different function **which function**. RDS gets in EDFTs, ADS, ACS
- then three more NCT2LLVM hits, and only the third breaks?

In [56]:
merged.loc[1048]

timestamp                        2022-10-19 18:13:49.298908160
thread                                                    1474
key                                                       load
name         runtime.decref_str.7272ea99c1f321ede541a5d770d...
Name: 1048, dtype: object

In [57]:
merged.index.get_loc(1048)

791

In [58]:
merged[600:792]

Unnamed: 0,timestamp,thread,key,name
567,2022-10-19 18:13:49.279105280,1474,entryPoint,setdefault
568,2022-10-19 18:13:49.279235072,1474,entryPoint,__str__
569,2022-10-19 18:13:49.279271936,1474,entryPoint,__repr__
570,2022-10-19 18:13:49.279303168,1474,entryPoint,_checkInvariants
571,2022-10-19 18:13:49.279403776,1474,entryPoint,__iter__
572,2022-10-19 18:13:49.279867904,1474,entryPoint,items
573,2022-10-19 18:13:49.279983360,1474,entryPoint,__iter__
574,2022-10-19 18:13:49.280092928,1474,entryPoint,__iter__
575,2022-10-19 18:13:49.280976640,1474,entryPoint,values
866,2022-10-19 18:13:49.298675968,1474,load,tp.extend.204dd25b1bed980846a34730e59cac38906a...


In [59]:
merged[791:1000]

Unnamed: 0,timestamp,thread,key,name
1048,2022-10-19 18:13:49.298908160,1474,load,runtime.decref_str.7272ea99c1f321ede541a5d770d...
1049,2022-10-19 18:13:49.298909184,1474,load,"runtime.decref_OneOf(SliceMessage, ControllerM..."
1050,2022-10-19 18:13:49.298909952,1474,load,runtime.destructor_<class 'TupleOf(PythonObjec...
1051,2022-10-19 18:13:49.298910720,1474,load,runtime.decref_TupleOf(ChunkHandleWrapper).e9b...
1052,2022-10-19 18:13:49.298911744,1474,load,runtime.destructor_<class 'ConstDict(OneOf(Non...
1053,2022-10-19 18:13:49.298912512,1474,load,runtime.decref_Node.837a40e0c5c178d0f8e6012531...
1054,2022-10-19 18:13:49.298913536,1474,load,runtime.decref_TupleOf(PythonObjectOfType(Chun...
1055,2022-10-19 18:13:49.298914560,1474,load,runtime.decref_Slice.6fd4ce7e2a31c2ab44cf558c2...
1056,2022-10-19 18:13:49.298915328,1474,load,runtime.decref_TupleOf(Scope).8cd3916aa8b0b35c...
1057,2022-10-19 18:13:49.298916096,1474,load,runtime.decref_SliceProxyMessage.af79f13c34fdc...


For each function, get the first occurences of namedCallTargetToLLVM, and all external_function_references

In [60]:
repeated_names

Index(['.get_global_variables', 'CLEAR',
       'runtime.decref_str.7272ea99c1f321ede541a5d770ddceac4eb071e9',
       'runtime.decref_bytes.631fa490bf2d97ed9f3634e3e07ddbb16af88614',
       'runtime.decref_OneOf(None, False, True, float, int, str, bytes).c200cf5f8c96f141827717b8c0e9acaa0b4352b8'],
      dtype='object')

In [61]:
external_function_refs = merged.query("key=='external_function_references'")
namedCallTargetToLLVM_firstcall = merged.query("key=='namedCallTargetToLLVM'").groupby('name').first().reset_index()
pd.concat([namedCallTargetToLLVM_firstcall, external_function_refs]).sort_values(by='timestamp')[['timestamp', 'thread', 'key', 'name']].style

Unnamed: 0,timestamp,thread,key,name
17,2022-10-19 18:13:48.875877888,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770ddceac4eb071e9
15,2022-10-19 18:13:48.876302080,1474,namedCallTargetToLLVM,runtime.decref_bytes.631fa490bf2d97ed9f3634e3e07ddbb16af88614
21,2022-10-19 18:13:48.876950528,1474,namedCallTargetToLLVM,tp_free
397,2022-10-19 18:13:48.876991488,1474,external_function_references,tp_free
20,2022-10-19 18:13:48.878050560,1474,namedCallTargetToLLVM,tp.const_dict_getitem.3af055c8e89da0e2f71691698260c76ded50baf4
0,2022-10-19 18:13:48.881526016,1474,namedCallTargetToLLVM,nativepython_float64_to_string
402,2022-10-19 18:13:48.881567488,1474,external_function_references,nativepython_float64_to_string
1,2022-10-19 18:13:48.882580480,1474,namedCallTargetToLLVM,nativepython_int64_to_string
405,2022-10-19 18:13:48.882616576,1474,external_function_references,nativepython_int64_to_string
9,2022-10-19 18:13:48.884142080,1474,namedCallTargetToLLVM,np_runtime_to_pyobj


In [62]:
defined_then_loaded

Unnamed: 0,timestamp,thread,key,name
0,2022-10-19 18:13:48.819706624,1474,definedNames,tp.assertAllFinite.2fc848d803f18a85389bece607b...
4,2022-10-19 18:13:48.819706624,1474,markExternal,tp.assertAllFinite.2fc848d803f18a85389bece607b...
22,2022-10-19 18:13:49.299036416,1474,definedNames,.get_global_variables
34,2022-10-19 18:13:49.299036416,1474,markExternal,.get_global_variables
19,2022-10-19 18:13:49.299036416,1474,definedNames,runtime.decref_str.7272ea99c1f321ede541a5d770d...
208,2022-10-19 18:13:49.299036416,1474,markExternal,runtime.decref_str.7272ea99c1f321ede541a5d770d...
15,2022-10-19 18:13:49.299036416,1474,definedNames,runtime.decref_bytes.631fa490bf2d97ed9f3634e3e...
241,2022-10-19 18:13:49.299036416,1474,markExternal,runtime.decref_bytes.631fa490bf2d97ed9f3634e3e...
18,2022-10-19 18:13:49.299036416,1474,definedNames,"runtime.decref_OneOf(None, False, True, float,..."
282,2022-10-19 18:13:49.299036416,1474,markExternal,"runtime.decref_OneOf(None, False, True, float,..."


In [63]:
history_df.query("key=='function_definitions'")

Unnamed: 0,timestamp,thread,key,name
0,2022-10-19 18:13:45.306560256,1429,function_definitions,CLEAR
387,2022-10-19 18:13:48.869465344,1474,function_definitions,runtime.decref_bytes.631fa490bf2d97ed9f3634e3e...
388,2022-10-19 18:13:48.869554176,1474,function_definitions,tp.<lambda>.268c95cda5ce1ec2e73642509ab391ce96...
389,2022-10-19 18:13:48.869595136,1474,function_definitions,"runtime.decref_OneOf(None, False, True, float,..."
390,2022-10-19 18:13:48.869644544,1474,function_definitions,runtime.decref_str.7272ea99c1f321ede541a5d770d...
391,2022-10-19 18:13:48.869680128,1474,function_definitions,tp.<lambda>.268c95cda5ce1ec2e73642509ab391ce96...
392,2022-10-19 18:13:48.869798400,1474,function_definitions,tp.const_dict_getitem.3af055c8e89da0e2f7169169...
393,2022-10-19 18:13:48.869958144,1474,function_definitions,"runtime.copy_initialize_OneOf(None, False, Tru..."
920,2022-10-19 18:13:49.309521152,1474,function_definitions,tp.<lambda>.990a00d84bb8e4ef8e49b07b2cb7a56b6f...
921,2022-10-19 18:13:49.309574656,1474,function_definitions,tp.<lambda>.990a00d84bb8e4ef8e49b07b2cb7a56b6f...


In [64]:
merged[merged.name.str.contains('decref_str')]

Unnamed: 0,timestamp,thread,key,name
10,2022-10-19 18:13:48.846901248,1474,allDefinedNames,runtime.decref_str.7272ea99c1f321ede541a5d770d...
863,2022-10-19 18:13:48.850128128,1474,load,runtime.decref_str.7272ea99c1f321ede541a5d770d...
390,2022-10-19 18:13:48.869644544,1474,function_definitions,runtime.decref_str.7272ea99c1f321ede541a5d770d...
394,2022-10-19 18:13:48.875877888,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...
400,2022-10-19 18:13:48.879414784,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...
403,2022-10-19 18:13:48.882052864,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...
406,2022-10-19 18:13:48.883041280,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...
413,2022-10-19 18:13:48.885219840,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...
418,2022-10-19 18:13:48.886705664,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...
424,2022-10-19 18:13:48.889829632,1474,namedCallTargetToLLVM,runtime.decref_str.7272ea99c1f321ede541a5d770d...


In [65]:
merged.query("key=='external_function_references'")

Unnamed: 0,timestamp,thread,key,name
397,2022-10-19 18:13:48.876991488,1474,external_function_references,tp_free
402,2022-10-19 18:13:48.881567488,1474,external_function_references,nativepython_float64_to_string
405,2022-10-19 18:13:48.882616576,1474,external_function_references,nativepython_int64_to_string
408,2022-10-19 18:13:48.884186112,1474,external_function_references,np_runtime_to_pyobj
410,2022-10-19 18:13:48.884502784,1474,external_function_references,np_try_pyobj_to_str
412,2022-10-19 18:13:48.884772352,1474,external_function_references,np_raise_exception_fastpath
415,2022-10-19 18:13:48.885440512,1474,external_function_references,runtime.decref_object.e7079dbf5b98d3fb81b9d12f...
420,2022-10-19 18:13:48.887154944,1474,external_function_references,np_add_traceback
423,2022-10-19 18:13:48.889088000,1474,external_function_references,nativepython_runtime_rshift_int64_int64
427,2022-10-19 18:13:48.891405824,1474,external_function_references,nativepython_runtime_string_cmp


In [66]:
repeated_names

Index(['.get_global_variables', 'CLEAR',
       'runtime.decref_str.7272ea99c1f321ede541a5d770ddceac4eb071e9',
       'runtime.decref_bytes.631fa490bf2d97ed9f3634e3e07ddbb16af88614',
       'runtime.decref_OneOf(None, False, True, float, int, str, bytes).c200cf5f8c96f141827717b8c0e9acaa0b4352b8'],
      dtype='object')

In [67]:
defined_then_loaded_names

array(['tp.assertAllFinite.2fc848d803f18a85389bece607b46c9987aaa86a',
       '.get_global_variables',
       'runtime.decref_str.7272ea99c1f321ede541a5d770ddceac4eb071e9',
       'runtime.decref_bytes.631fa490bf2d97ed9f3634e3e07ddbb16af88614',
       'runtime.decref_OneOf(None, False, True, float, int, str, bytes).c200cf5f8c96f141827717b8c0e9acaa0b4352b8',
       'tp.extend.204dd25b1bed980846a34730e59cac38906a757a',
       'tp.toListOf.0cec974228c22536e92c54289efa8c49a4779284',
       'runtime.decref_Set(str).d61832338c386eac039b73e57d36514ee53bafd8'],
      dtype=object)

In [68]:
external_referenced_names = merged.query("key=='external_function_references'").name.unique()

In [69]:
set(defined_then_loaded_names) - set(repeated_names )

{'runtime.decref_Set(str).d61832338c386eac039b73e57d36514ee53bafd8',
 'tp.assertAllFinite.2fc848d803f18a85389bece607b46c9987aaa86a',
 'tp.extend.204dd25b1bed980846a34730e59cac38906a757a',
 'tp.toListOf.0cec974228c22536e92c54289efa8c49a4779284'}