In [185]:
# This python notebook annotate the bioconda recipes using the ids from bio.tools in the external session 
# in bioconda recipes. Two different input should be provided, the bioconda_recipes_path in your local machine and
# the file with the mapping.   


import glob2
import os

import requests
import sys
import yaml
import getopt
import re
from pandas import DataFrame, read_csv

from ruamel.yaml import YAML

from yaml.constructor import ConstructorError
from yaml.scanner import ScannerError

In [190]:
def get_bioconda_yalm(bioconda_path):
    yalm_files = dict()
    matches =  glob2.glob(bioconda_path + '/**/*.yaml', with_matches=True)
    yaml = YAML(typ='jinja2')
    yaml.allow_duplicate_keys = True
    yaml.allow_unicode = True
    if(len(matches) > 0 ):
        for file_name in matches:
            if(os.path.isfile(file_name[0])):
                #print(file_name[0])
                path_list = file_name[0].replace(bioconda_path, "").split(os.sep)
                software_name = path_list[1]
                try:
                    with open(file_name[0]) as fp:
                         yalm_file = yaml.load(fp)
                    yalm_files[software_name] = (file_name[0], yalm_file)
                except (ScannerError, ConstructorError, TypeError, AttributeError) as error:
                    print("ERROR | " + software_name + "\t" + file_name[0])
    return yalm_files          

In [191]:
def get_annotation_mapping(mapping_file):
    mapping = read_csv(mapping_file)
    return mapping    

In [192]:
def annotate_yaml(yaml_file, biotool):
    path_file = yaml_file[0]
    yaml_file_content = yaml_file[1]
    if yaml_file_content.get('extra') is None:
        yaml_file_content['extra'] = {}
    else:
        print("EXTRA has been found: " + biotool)
    yaml_file_content['extra']['biotools']=[biotool]
    with open(path_file, 'w') as fp:
        yaml.dump(yaml_file_content, fp)
    

def annotate_yamls(yaml_dictonary, mappings):
    for (idx, row) in mappings.iterrows():
        if yaml_dictonary.get(row.bioconda) is not None:
            annotate_yaml(yaml_dictonary[row.bioconda], row.biotools)

In [193]:
bioconda_recipes_path = "/Users/yperez/IdeaProjects/github-repo/biodocker/bioconda-recipes/recipes"
mapping_file          = "/Users/yperez/IdeaProjects/github-repo/biodocker/bioconda-biotools/mapping_matchonly.csv"
mapping = get_annotation_mapping(mapping_file)
yaml_files = get_bioconda_yalm(bioconda_recipes_path)
print(yaml_files)
annotate_yamls(yaml_files, mapping)

EXTRA has been found: weeder


EXTRA has been found: bbmap
EXTRA has been found: freec
EXTRA has been found: searchgui
EXTRA has been found: trinity
EXTRA has been found: chromhmm
EXTRA has been found: fraggenescan
EXTRA has been found: gatk
EXTRA has been found: tripal
EXTRA has been found: openms
EXTRA has been found: freebayes
EXTRA has been found: vep
EXTRA has been found: sff2fastq
EXTRA has been found: qiime
EXTRA has been found: effectivet3
EXTRA has been found: mixomics
EXTRA has been found: Novoalign
EXTRA has been found: multiqc


meta.yaml
biotools.yaml
0.4
0.5
1.5.2
1.9.0
2.0.1-k128
1.0.3
2.4.0
1.2.36
3.11.14
1.0
1.1
2.3.0
0.2
0.3
0.7
0.8
37.10
37.17
0.6.2
1.2
1.3
1.3.1
2.16.2
2.17.0
2.19.1
2.20.1
2.22
2.23.0
2.25
2.26.0
0.7.0
2.12.1
2.8.2
2.0.9
2.2.12
1.48.0
3.12.1
1.22.0
3.28.10
3.28.2
3.3.0
1.4.0
2.13.0
1.2.14
0.8.11
1.44.0
1.46.0
2.1.4
2.1.5
0.8.0
2.2.21
2.5.0
2.2.22
1.0.0
2.2.4
2.2.5
0.9.1
0.1.1
0.5.9
0.7.3a
0.7.8
1.3.2
3.0
1.2.2-b
1.2.3
1.1.0
0.7.2
1.0.6
1.5.8.2
1.5.9.1
2.3.5
2.3.6
2.4.1
3.12.3
0.7.12
0.8.22
0.8.24
0.8.26
0.8.27
0.8.28
0.8.29
0.8.30
0.8.31
0.8.36
0.9.10
0.10.0
0.8.2
0.9.0
0.06
1.1.2
2.0.17
3.1.3
3.2
0.0.4
0.0.5
0.0.6
0.5.10a
0.5.6
0.5.8
0.6.1
0.10.1
0.11.3
2.1.8
20141125
20151222
v2.3.4
v2.4.0
0.1.0
0.2.0
0.5.3
0.5.4
16.10.4
16.10.8
16.4.0
16.7.10
2.0
3.5
4.4.0
5.0.0
6.8.3
9.07
9.10
2014.12.23
2015.09.10
2015.12.31
2016.09.23
2017.02.15
2017.05.08
0.50
0.1.3
1.3.20
1.3.26
2.3.2
0.6.1.post1
0.11.1
0.9.9
0.3.0
0.3.2
1.0.2
1.5.3
4.6.1
1.0.7.2
6.0.77
7.0.91
1.1.11
2.2.3
3.6.0
1.4.5
2.0.8
2.3