In [1]:
# generate_integrated_flowchart.py

from graphviz import Digraph

def create_integrated_flowchart(output_filename='integrated_strobmer_minhash_flowchart', file_format='png'):
    """
    Creates a detailed flowchart for the integrated Strobmer and Weighted MinHash approach.
    
    Args:
        output_filename (str): The name of the output file without extension.
        file_format (str): The format of the output file (e.g.,'png').
    
    Returns:
        None
    """
    # Initialize a directed graph
    dot = Digraph(comment='Integrated Strobmer and Weighted MinHash Workflow', format=file_format)
    
    # Define node styles
    node_attrs = {
        'shape': 'box',
        'style': 'rounded,filled',
        'color': 'lightblue2',
        'fontname': 'Helvetica',
        'fontsize': '10'
    }
    
    # Define decision node style
    decision_attrs = {
        'shape': 'diamond',
        'style': 'filled',
        'color': 'lightgoldenrod',
        'fontname': 'Helvetica',
        'fontsize': '10'
    }
    
    # Define start and end node styles
    start_end_attrs = {
        'shape': 'oval',
        'style': 'filled',
        'color': 'lightgreen',
        'fontname': 'Helvetica',
        'fontsize': '10'
    }
    
    # Add nodes w/ input variable descriptions
    dot.node('Start', 'Start', **start_end_attrs)
    dot.node('LoadSequence', 'Load DNA Sequence\n(Input: sequence)', **node_attrs)
    dot.node('InitializeStrobmer', 'Initialize Strobmer Parameters\n(k: k-mer size, l: window size, s: spacing, include_revcom)', **node_attrs)
    dot.node('GenerateStrobemers', 'Generate Strobemers with Frequencies\n(Action: Sliding window to extract strobemers and count frequencies)', **node_attrs)
    dot.node('InitializeMinHash', 'Initialize Weighted MinHash\n(Input: num_hashes)', **node_attrs)
    dot.node('GenerateGenomeSketch', 'Generate Genome Strobemer Sketch\n(Input: genome_strobemers_with_freq, WeightedMinHash instance)', **node_attrs)
    dot.node('GenerateReadSketch', 'Generate Read Strobemer Sketch\n(Input: read_strobemers_with_freq, WeightedMinHash instance)', **node_attrs)
    dot.node('EstimateSimilarity', 'Estimate Jaccard Similarity\n(Action: Compare genome and read sketches)', **node_attrs)
    dot.node('OutputSimilarity', 'Output Similarity Score\n(Action: Print or store the similarity)', **node_attrs)
    dot.node('End', 'End', **start_end_attrs)
    
    # Addimg edges with labels indicating flow
    dot.edge('Start', 'LoadSequence')
    dot.edge('LoadSequence', 'InitializeStrobmer')
    dot.edge('InitializeStrobmer', 'GenerateStrobemers')
    dot.edge('GenerateStrobemers', 'InitializeMinHash')
    dot.edge('InitializeMinHash', 'GenerateGenomeSketch')
    dot.edge('InitializeMinHash', 'GenerateReadSketch')  # Reusing the same MinHash instance
    dot.edge('GenerateGenomeSketch', 'EstimateSimilarity')
    dot.edge('GenerateReadSketch', 'EstimateSimilarity')
    dot.edge('EstimateSimilarity', 'OutputSimilarity')
    dot.edge('OutputSimilarity', 'End')
    
    # Render the flowchart
    dot.render(output_filename, view=True)
    print(f"Integrated Strobmer-Weighted MinHash Flowchart saved as {output_filename}.{file_format}")

if __name__ == "__main__":
    create_integrated_flowchart()

Integrated Strobmer-Weighted MinHash Flowchart saved as integrated_strobmer_minhash_flowchart.png
