In [1]:
import re
import glob
import json

In [2]:
path = "../../graph/src/*.rs"

In [3]:
setup = """
use super::*;
use arbitrary::Arbitrary;
use std::collections::HashSet;

"""

In [4]:
def read_file(path):
    with open(path, "r") as f:
        return f.read()
    
def read_files(path):
    return [
        read_file(file)
        for file in glob.glob(path)
    ]

In [5]:
files = read_files(path)

In [6]:
def remove_prefix(text, prefix):
    if text.startswith(prefix):
        return text[len(prefix):]
    return text  # or whatever

def parse_arguments(arguments):
    flat = [
        remove_prefix(x.strip(), "mut").strip()
        for comb in arguments.split(":")
        for x in comb.rsplit(",", 1)
    ]
    
    return [
            (arg_name, arg_type)
            for (arg_name, arg_type) in zip(flat[::2], flat[1::2])
    ]

In [7]:
#  r"pub\s+fn\s+([^\(\{]+?)\((self|&\s*self|&mut\s+self|&'.\s+self)?([^\)\{]+?)\)\s*(:?->\s*([^\{]+?))\{"

In [8]:
functions = [
    x
    for file in files
    for x in re.findall(
        r"pub\s+fn\s+([^\{]+?)\{", 
        "\n".join(
            re.findall(
                "impl\s+Graph\s+\{.+\}", 
                file.replace("\n", "")
            )
        ) or "", 
        re.MULTILINE
    )
]
len(functions)

196

In [9]:
functions_info = {
    function_name:{
        "arguments":parse_arguments(args),
        "return_type":return_type.strip(),
    }
    for function in functions
    for function_name, _, _, args, return_type in re.findall(r"(\w+)(<.+?>)?\(\s*(self|&\s*self|&\s*mut\s*self)\s*,\s*(.+)\s*\)\s*->\s*(.+)\s*", function)
}

In [10]:
print(json.dumps(functions_info, indent=4))

{
    "random_spanning_arborescence_kruskal": {
        "arguments": [
            [
                "random_state",
                "EdgeT"
            ],
            [
                "unwanted_edge_types",
                "&Option<HashSet<Option<EdgeTypeT>>>"
            ],
            [
                "verbose",
                "bool"
            ]
        ],
        "return_type": "(HashSet<(NodeT, NodeT)>, Vec<NodeT>, NodeT, NodeT, NodeT)"
    },
    "spanning_arborescence_kruskal": {
        "arguments": [
            [
                "verbose",
                "bool"
            ]
        ],
        "return_type": "(HashSet<(NodeT, NodeT)>, Vec<NodeT>, NodeT, NodeT, NodeT)"
    },
    "spanning_arborescence": {
        "arguments": [
            [
                "verbose",
                "bool"
            ]
        ],
        "return_type": "Result<(usize, impl Iterator<Item = (NodeT, NodeT)> + '_), String>"
    },
    "connected_components": {
        "arguments": [
     

In [11]:
def filter_function(function_name, infos):
    return function_name in [
        "validate_weight",
        "parse_weight",
        "has_edge"
        
    ] or any(
        e in arg[1]
        for arg in infos["arguments"]
        for e in [
            "impl", 
            "Fn", 
            "Iter", 
            "NodeFileReader", 
            "EdgeFileReader", 
            "Graph", 
            "Compute_hash_Params",
            "&str",
            "&[u32]",
            "&String",
            "SingleWalkParameters",
            "WalksParameters",
            "WalkWeights",
            "Self",
            "&[String]",
            "&"
        ]
    )
        

In [12]:
structs = []
fields = []

template = """
#[derive(Arbitrary, Debug, Clone)]
pub struct %s {
%s
}
"""

for function_name, info in functions_info.items():
    
    # Check if the function has parameters (other than self)
    if len(info["arguments"]) == 0:
        continue
        
    if filter_function(function_name, info):
        print("\n\nSkipping ", function_name)
        print(info["arguments"])
        continue
        
    
    struct_name = function_name.capitalize() + "_Params"
    
    fields.append("\tpub {function_name} : {struct_name},".format(
        function_name=function_name, 
        struct_name=struct_name,
    ))
        
          
    struct = template%(
        struct_name,
        "\n".join([
            "\tpub {field_name} : {field_type},".format(
                field_name=arg_name,
                field_type=arg_type,
            )
        for arg_name, arg_type in info["arguments"]
        ])
    )
    
    structs.append(struct)
    
    
fields.append( "\tpub from_vec: FromVecHarnessParams,")

harness = template%(
    "TheUltimateFuzzer",
    "\n".join(fields)
)
structs.append(harness)
    
result = "\n".join(structs)



Skipping  random_spanning_arborescence_kruskal
[('random_state', 'EdgeT'), ('unwanted_edge_types', '&Option<HashSet<Option<EdgeTypeT>>>'), ('verbose', 'bool')]


Skipping  overlaps
[('other', '&Graph')]


Skipping  contains
[('other', '&Graph')]


Skipping  are_nodes_remappable
[('other', '&Graph')]


Skipping  remap
[('other', '&Graph'), ('verbose', 'bool')]


Skipping  is_singleton_by_node_name
[('node_name', '&str')]


Skipping  has_node_by_node_name
[('node_name', '&str')]


Skipping  has_node_with_type_by_node_name
[('node_name', '&str'), ('node_type_name', 'Option<Vec<String>>')]


Skipping  has_edge_by_node_names
[('src_name', '&str'), ('dst_name', '&str')]


Skipping  has_edge_with_type_by_node_names
[('src_name', '&str'), ('dst_name', '&str'), ('edge_type_name', 'Option<&String>')]


Skipping  get_weight_with_type_by_node_names
[('src', '&str'), ('dst', '&str'), ('edge_type', 'Option<&String>')]


Skipping  get_weight_by_node_names
[('src_name', '&str'), ('dst_name', '&str')

In [13]:
len(structs)

82

In [14]:
def build_call(function_name, infos):
    if filter_function(function_name, infos):
        return ""
    
    args = ", ".join([
        f"data.{function_name}.{arg[0]}"
        for arg in infos["arguments"]
    ])
    
    result = f"\tgraph.{function_name}({args})"
    
    if infos["return_type"].startswith("Result"):
        result += "?"
    
    return result + ";"

In [15]:
harness = """
pub fn mega_test(data: TheUltimateFuzzer) -> Result<(), String> {
    let data_copy = data.clone();
    let data_copy2 = data.clone();
    std::panic::set_hook(Box::new(move |info| {
        handle_panics_mega_test(info, data_copy.clone());
    }));

    let mut graph = graph::Graph::from_string_unsorted(
        data.from_vec.edges.into_iter(),
        data.from_vec.nodes.map(|ns| ns.into_iter()),
        data.from_vec.directed,
        data.from_vec.directed_edge_list,
        data.from_vec.name,
        data.from_vec.ignore_duplicated_nodes,
        data.from_vec.ignore_duplicated_edges,
        data.from_vec.verbose,
        data.from_vec.numeric_edge_types_ids,
        data.from_vec.numeric_node_ids,
        data.from_vec.numeric_edge_node_ids,
        data.from_vec.numeric_node_types_ids,
        data.from_vec.has_node_types,
        data.from_vec.has_edge_types,
        data.from_vec.has_edge_weights,
    )?;
    
    

    let g_copy = graph.clone();
    std::panic::set_hook(Box::new(move |info| {
        handle_panics_mega_test_once_loaded(info, data_copy2.clone(), g_copy.clone());
    }));

    
%s

    Ok(())
}
"""%"\n".join(
    build_call(function_name, infos)
    for function_name, infos in functions_info.items()
)

In [16]:
with open("result.rs", "w") as f:
    f.write(setup + result + harness)