In [210]:
# This python notebook annotate the bioconda recipes using the ids from bio.tools in the external session 
# in bioconda recipes. Two different input should be provided, the bioconda_recipes_path in your local machine and
# the file with the mapping.   


import glob2
import os

import requests
import sys
import getopt
import re
from pandas import DataFrame, read_csv

from ruamel.yaml import YAML

from yaml.constructor import ConstructorError
from yaml.scanner import ScannerError

In [211]:
def get_bioconda_yalm(bioconda_path):
    yalm_files = dict()
    matches =  glob2.glob(bioconda_path + '/**/*.yaml', with_matches=True)
    yaml = YAML(typ='jinja2')
    yaml.allow_duplicate_keys = True
    yaml.explicit_start  = True
    if(len(matches) > 0 ):
        for file_name in matches:
            if(os.path.isfile(file_name[0])):
                #print(file_name[0])
                path_list = file_name[0].replace(bioconda_path, "").split(os.sep)
                software_name = path_list[1]
                try:
                    with open(file_name[0]) as fp:
                         yalm_file = yaml.load(fp)
                    yalm_files[software_name] = (file_name[0], yalm_file)
                except (ScannerError, ConstructorError, TypeError, AttributeError) as error:
                    print("ERROR | " + software_name + "\t" + file_name[0])
    return yalm_files          

In [212]:
def get_annotation_mapping(mapping_file):
    mapping = read_csv(mapping_file)
    return mapping    

In [215]:
def annotate_yaml(yaml_file, biotool):
    path_file = yaml_file[0]
    yaml_file_content = yaml_file[1]
    if yaml_file_content.get('extra') is None:
        yaml_file_content['extra'] = {}
    else:
        print("EXTRA has been found: " + biotool)
    yaml_file_content['extra']['biotools']=[biotool]
    with open(path_file, 'w') as fp:
        yaml.dump(yaml_file_content, fp)
    

def annotate_yamls(yaml_dictonary, mappings):
    for (idx, row) in mappings.iterrows():
        if yaml_dictonary.get(row.bioconda) is not None:
            annotate_yaml(yaml_dictonary[row.bioconda], row.biotools)

In [None]:
bioconda_recipes_path = "/Users/yperez/IdeaProjects/github-repo/biodocker/bioconda-recipes/recipes"
mapping_file          = "/Users/yperez/IdeaProjects/github-repo/biodocker/bioconda-biotools/mapping_matchonly.csv"
mapping = get_annotation_mapping(mapping_file)
yaml_files = get_bioconda_yalm(bioconda_recipes_path)
print(yaml_files)
annotate_yamls(yaml_files, mapping)