In [None]:
import pandas as pd
import yaml
from pathlib import Path

In [None]:
projects = {}
input_dir = Path("../data/region_gbks_v2/known_molecule_bgcs/")
project_name = [i.name for i in input_dir.glob("*") if i.is_dir()]
for p in project_name:
    original_project_name = p
    p = p.replace(" ", "_")
    project_dir = input_dir / original_project_name
    gbks = [g.stem for g in project_dir.glob("clinker/input/*.gbk")]
    print(original_project_name, p, len(gbks))
    values = {}
    for num, bgc in enumerate(gbks):
        gbk_path = project_dir / f"clinker/input/{bgc}.gbk"
        source = "bgcflow"
        if bgc.startswith("BGC"):
            bgc = bgc.split("_")[0]
            source = "MIBIG"
        assert gbk_path.is_file(), gbk_path
        values[num] = {"bgc_id" : bgc,
                      "gbk_path" : str(gbk_path.relative_to("..")),
                      "genome_id" : "unknown",
                      "source" : source}
    projects[p] = values

for project_selected in projects.keys():
    #project_selected = '9-methylstreptimidone'
    df = pd.DataFrame.from_dict(projects[project_selected]).T
    outdir = Path(f"../config/{project_selected}")
    outdir.mkdir(parents=True, exist_ok=True)
    df.to_csv(outdir / "samples.csv", index=False)

    project_config = {'name': project_selected,
                      'pep_version': '2.1.0',
                      'description': f'A selection of BGCs producing {project_selected}',
                      'sample_table': 'samples.csv',
                      'rules': {'bigslice': False,
                                'bigscape': False,
                                'query-bigslice': False,
                                'clinker': True,
                                'interproscan': False,
                                'mmseqs2': True}
                     }
    # Write data to a YAML file
    with open(outdir / 'project_config.yaml', 'w') as yaml_file:
        yaml.dump(project_config, yaml_file, default_flow_style=False)

    print(f"  - pep: config/{project_selected}/project_config.yaml")