[**<<< Back to Snakemake Report**](../../)
# BGCFlow Report

This page summarizes the final processed data of [**BGCFlow**](https://github.com/NBChub/bgcflow): _Snakemake workflow to systematically analyze Biosynthetic Gene Clusters from a collection of genomes (pangenomes) from internal & public genome datasets._

In [None]:
# Import Libraries
from IPython.display import display, Markdown, Latex
from pathlib import Path
import json, yaml
import peppy

In [None]:
# BGCFlow config
BGCFlow_path = Path("../../").resolve()
config_path = BGCFlow_path / "config/config.yaml"
with open(config_path, "r") as f:
    config = yaml.safe_load(f)
    
# rules_dict
rules_dict_path = BGCFlow_path / "workflow/rules/rules_main.json"
with open(rules_dict_path, "r") as f:
    rules_dict = json.load(f)

In [None]:
# Get Metadata
project_metadata = {}
    
for p in config['projects']:
    # process only pep projets
    if p['name'].endswith(".yaml"):
        with open(BGCFlow_path / p['name']) as f:
            project = yaml.safe_load(f)
            if 'description' in project.keys():
                name = project['name']
                description = project['description']
                project_metadata[name] = {'description' : description}
    else:
        name = p['name']
        project_metadata[name] = "No description provided."
    
    # get what rules are being used
    rule_used = []
    if 'rules' in project.keys():
        rules = project["rules"]
    else:
        rules = config["rules"]
    for r in rules.keys():
        if rules[r]:
            rule_used.append(r)
    project_metadata[name].update({'rule_used' : rule_used})
    
    # get citations
    citation_all = []
    for r in rule_used:
        citations = rules_dict[r]['references']
        citation_all.extend(citations)
    citation_all.sort()
    project_metadata[name].update({'references' : citation_all})

In [None]:
# Create table of contents
projects = Path("../../data/processed").resolve()
ignore = "*.ipynb*"

# text container
text = "## Available Outputs\nPlease find the list of available outputs from BGCFlow projects here:\n"

ctr = 0
for item in projects.glob("*"):
    if not item.name in project_metadata.keys():
        pass
    elif (not str(item.name).startswith(".")) & (item.is_dir()):            
        ctr = ctr + 1
        line1 = f"""{ctr}. [**{item.name}**](./{item.name})"""
        text = text + "\n" +  line1
        
        # add description
        if item.name in project_metadata.keys():
            description = f": {project_metadata[item.name]['description']}"
            text = text +  description
        subdir_ctr = 0
        
        for subdir in item.glob("*"):
            subdir_ctr = subdir_ctr + 1
            line2 = f"    {subdir_ctr}. [{subdir.name}](./{item.name}/{subdir.name})"
            text = text + "\n"+  line2
            subdir_ctr2 = 0
            for subdir2 in subdir.glob("*"):
                if (not str(subdir2.name).startswith(".")) & (subdir2.is_dir()):
                    subdir_ctr2 = subdir_ctr2 + 1
                    line3 = f"        {subdir_ctr2}. [{subdir2.name}](./{item.name}/{subdir.name}/{subdir2.name})"
                    text = text + "\n" +  line3
                elif str(subdir2.name).endswith("mapping.csv"):
                    subdir_ctr2 = subdir_ctr2 + 1
                    line3 = f"        {subdir_ctr2}. [{subdir2.name}](./{item.name}/{subdir.name}/{subdir2.name})"
                    text = text + "\n" +  line3                

# Display all text                    
display(Markdown(text))

In [None]:
# text container
text = "### References"
ctr = 0
for item in projects.glob("*"):
    if not item.name in project_metadata.keys():
        pass
    elif (not str(item.name).startswith(".")) & (item.is_dir()):           
        subdir_ctr = subdir_ctr + 1
        line2 = f"#### References for tools used in project [**{item.name}**](./{item.name})"
        text = text + "\n"+  line2
        subdir_ctr2 = 0
        
        for c in project_metadata[item.name]['references']:
            subdir_ctr2 = subdir_ctr2 + 1
            line3 = f"{subdir_ctr2}. {c})"
            text = text + "\n" +  line3
# Display all text                    
display(Markdown(text))