In [None]:
# Customize these variables to match your environment
dump_file_v1 = ("../dump1", "4.13.3")
dump_file_v2 = ("../dump2", "4.8.5")

In [None]:
version1 = dump_file_v1[1]
version2 = dump_file_v2[1]

In [None]:
# parse lines of a file like this:
# (FStar.Seq.Properties.fsti(230,0-232,71))       Query-stats (FStar.Seq.Properties.lemma_ordering_hi_cons, 1)    succeeded in 10 milliseconds with fuel 2 and ifuel 1 and rlimit 5
# and produce a dictionary with the following structure:
#  { "FStar.Seq.Properties.lemma_ordering_hi_cons, 1" : { status:"succeeded" , "time" : 10, "fuel" : 2, "ifuel" : 1, "rlimit" : 5 } }

import sys
import re
import json

query_stats_re = re.compile(r'Query-stats \(([^,]+, \d+)\)\s+(succeeded|failed)( {[^}]+})?( \(with hint\))? in (\d+) milliseconds with fuel (\d+) and ifuel (\d+) and rlimit (\d+)')
splitting_query_stats = re.compile(r'Query-stats splitting query')

def parse_line(line):
    m = query_stats_re.search(line)
    if m:
        return { m.group(1) : { "status" : m.group(2), "reason":m.group(3), "with_hint":m.group(4), "time" : int(m.group(5)), "fuel" : int(m.group(6)), "ifuel" : int(m.group(7)), "rlimit" : int(m.group(8)) } }
    else:
        m = splitting_query_stats.search(line)
        if m:
            return None
        else:
            if "Query-stats" in line:
                if "{\"contents\":" in line:
                    return None 
                print("Failed to parse line: " + line)
        return None

In [None]:
test_line=parse_line("(FStar.Seq.Properties.fsti(230,0-232,71))        Query-stats (FStar.Seq.Properties.lemma_ordering_hi_cons, 1)    succeeded (with hint) in 10 milliseconds with fuel 2 and ifuel 1 and rlimit 5")
if test_line:
    print("Parsed line: ", test_line)
    json.dumps(test_line)
else:
    print("Failed to parse line")

In [None]:

def parse_file(file):
    d = {}
    print("opening file", file)
    with open(file, encoding='ISO-8859-1') as f:
        for line in f:
            # print(line)
            r = parse_line(line)
            if r:
                d.update(r)
    return d


In [None]:
z3_v1 = parse_file(dump_file_v1[0])
print("Parsed", len(z3_v1), "entries")


In [None]:
z3_v2 = parse_file(dump_file_v2[0])
print("Parsed", len(z3_v2), "entries")

In [None]:
# find entries in z3_v2 that are not in z3_v1
for k in z3_v2.keys():
    if k not in z3_v1:
        print(f"Missing entry in {version1}: {k}")

# find entries in z3_v1 that are not in z3_v2
for k in z3_v1.keys():
    if k not in z3_v2:
        print(f"Missing entry in {version2}: {k}")

In [None]:
# for every entry in both dictionaries, create a new dictionary with the same key, the time fields of both dictionaries, and the difference between the times
diffs = {}
for k in z3_v1.keys():
    if k in z3_v2.keys():
        diffs[k] = { version1 : z3_v1[k]["time"], version2 : z3_v2[k]["time"], "diff" : z3_v2[k]["time"] - z3_v1[k]["time"] }

print(f"Found {len(diffs)} entries with both {version1} and {version2} times")
json.dumps(diffs)

In [None]:
# sort the entries by the difference in time
sorted_diffs = sorted(diffs.items(), key=lambda x: x[1]["diff"], reverse=True)

print(sorted_diffs)
#print the top 10
print(f"Entries with the greatest speedups in {version1}:")
for i in range(10):
    print(sorted_diffs[i])

print(f"Entries with the greatest slowdowns in {version1}")
# print the bottom 10
for i in range(10):
    print(sorted_diffs[-(i + 1)])


In [None]:
# scatter plot with 485 times on x axis and 413 times on y axis
import matplotlib.pyplot as plt
import numpy as np

# remove outliers
diffs = {k:v for k,v in sorted_diffs if abs(v["diff"]) < 10000}

x = [v[version2] for v in diffs.values()]
y = [v[version1] for v in diffs.values()]

plt.scatter(x, y)
plt.xlabel(version2)
plt.ylabel(version1)
plt.show()


In [None]:
# linear regression of v1 times on v2 times
from scipy import stats
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
print("slope:", slope, "intercept:", intercept, "r_value:", r_value, "p_value:", p_value, "std_err:", std_err)
plt.plot(x, [slope * v + intercept for v in x])
plt.scatter(x, y)
plt.xlabel(version2)
plt.ylabel(version1)
plt.show()


In [None]:
# print all outliers
for k,v in sorted_diffs:
    if abs(v["diff"]) > 10000:
        print(k, v)

In [None]:
# print all sorted diffs
for k,v in diffs.items():
    print(k, v)