This notebook contains some basic processing to automate the collection of statistics relating to the Dafny files.
By creating functions to perform analysis of dafny files, additional results can be added to the pandas dataframe.
The use of a pandas dataframe provides many options for visualisation and the data can easily by stored in a csv.
The data can also easily be supplemented with timestamps to faciliate time series analysis.

This file is a working file and will be converted to a python script in due course.

In [37]:
# import libraries
import os
import subprocess

import pandas as pd

In [24]:
# find *.dfy files, with a given repo
def getListOfDafnyFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = list()
    # Iterate over all the entries
    for entry in listOfFile:
        # Create full path
        fullPath = os.path.join(dirName, entry)
        # If entry is a directory then get the list of files in this directory 
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfDafnyFiles(fullPath)
        else:
            if entry.endswith(".dfy"):
                allFiles.append(fullPath)
    return sorted(allFiles)   

In [25]:
# find folders within the repo that have *.dfy files
def getListOfDafnyFolders(dafnyFiles):
    listOfDirectories = list()
    for file in dafnyFiles:
        listOfDirectories.append(os.path.dirname(file))
    return sorted(list(set(listOfDirectories)))

In [36]:
# get folder for an inidividual dafny file
def getFolder(repo, dafny_file):
    repo_path, folder = os.path.dirname(dafny_file).split(repo,1)
    return folder

In [26]:
# test the getListOfDafnyFiles, getListOfDafnyFolders and getFolder functions
repo_directory = "/Users/joannefuller/Documents/vscode/eth2.0-dafny/"

files = getListOfDafnyFiles(repo_directory)
for i in files:
    print(i)
print(len(files))

directories = getListOfDafnyFolders(files)
for i in directories:
    print(i)
print(len(directories))

for file in files:
    print(getFolder(repo_directory, file))

/Users/joannefuller/Documents/vscode/eth2.0-dafny/src/dafny/merkle/Merkleise.dfy
/Users/joannefuller/Documents/vscode/eth2.0-dafny/src/dafny/ssz/BitListSeDes.dfy
/Users/joannefuller/Documents/vscode/eth2.0-dafny/src/dafny/ssz/BoolSeDes.dfy
/Users/joannefuller/Documents/vscode/eth2.0-dafny/src/dafny/ssz/BytesAndBits.dfy
/Users/joannefuller/Documents/vscode/eth2.0-dafny/src/dafny/ssz/IntSeDes.dfy
/Users/joannefuller/Documents/vscode/eth2.0-dafny/src/dafny/ssz/Serialise.dfy
/Users/joannefuller/Documents/vscode/eth2.0-dafny/src/dafny/utils/DafTests.dfy
/Users/joannefuller/Documents/vscode/eth2.0-dafny/src/dafny/utils/Eth2Types.dfy
/Users/joannefuller/Documents/vscode/eth2.0-dafny/src/dafny/utils/Helpers.dfy
/Users/joannefuller/Documents/vscode/eth2.0-dafny/src/dafny/utils/MathHelpers.dfy
/Users/joannefuller/Documents/vscode/eth2.0-dafny/src/dafny/utils/NativeTypes.dfy
/Users/joannefuller/Documents/vscode/eth2.0-dafny/src/dafny/utils/SeqHelpers.dfy
/Users/joannefuller/Documents/vscode/eth2.

In [65]:
def getLemmas(dafny_file):
    cmd = "cat " + dafny_file +"| grep lemma | wc -l"
    result = subprocess.run(['/bin/bash', '-i', '-c', cmd], stdout=subprocess.PIPE)
    return result.stdout.strip().decode('ascii')

In [66]:
def getFunctions(dafny_file):
    cmd = "cat " + dafny_file +"| grep function | grep method | wc -l"
    result = subprocess.run(['/bin/bash', '-i', '-c', cmd], stdout=subprocess.PIPE)
    return result.stdout.strip().decode('ascii')


In [67]:
# test the getLemmas and getFunctions functions
test_file = "/Users/joannefuller/Documents/vscode/eth2.0-dafny/src/dafny/ssz/BytesAndBits.dfy"

print(getLemmas(test_file))
print(getFunctions(test_file))

3
4


In [83]:
# create a pandas dataframe to store stats relating to the dafny files
# initially this table will just contain 4 columns
column_list = ['Files', 'Folder', 'Lemmas', 'Functions']
repo_directory = "/Users/joannefuller/Documents/vscode/eth2.0-dafny/"

df = pd.DataFrame(columns=column_list)

for file in files:
    df2 = pd.DataFrame([[os.path.basename(file), 
                        getFolder(repo_directory, file), 
                        getLemmas(file), 
                        getFunctions(file)]], 
                        columns=column_list)
    df = df.append(df2, ignore_index=True)

#display a sample of rows
df.head(len(df))


Unnamed: 0,Files,Folder,Lemmas,Functions
0,Merkleise.dfy,src/dafny/merkle,5,3
1,BitListSeDes.dfy,src/dafny/ssz,8,4
2,BoolSeDes.dfy,src/dafny/ssz,0,2
3,BytesAndBits.dfy,src/dafny/ssz,3,4
4,IntSeDes.dfy,src/dafny/ssz,1,2
5,Serialise.dfy,src/dafny/ssz,3,3
6,DafTests.dfy,src/dafny/utils,0,1
7,Eth2Types.dfy,src/dafny/utils,1,0
8,Helpers.dfy,src/dafny/utils,11,2
9,MathHelpers.dfy,src/dafny/utils,5,2
