In [1]:
import scoped_mapping
import requests
import yaml
import sys

In [2]:
model_file_name  = '../../data/webmap_enums.yaml'
my_selected_enum = 'Taxon_enum'
my_bad_chars     = '._-'
my_onto_prefix   = 'ncbitaxon'
my_query_fields  = '' # OLS weighted default

my_row_request     = 3
my_string_dist_arg = 2
my_max_string_dist = 0.2

In [3]:
# model_url = 'https://raw.githubusercontent.com/turbomam/scoped-mapping/main/data/webmap_enums.yaml'
# model_response = requests.get(model_url)
# model_content = model_response.content
# with open(model_file_name, 'wb') as f:
#     f.write(model_content)

In [4]:
my_model = scoped_mapping.read_yaml_model(model_file_name)

## What does the model look like before mapping?

In [5]:
yaml.safe_dump(my_model, sys.stdout, default_flow_style=False)

classes:
  example:
    slot_usage: {}
    slots:
    - Taxon
    - FAO
    - Engineering
default_prefix: example
description: example
enums:
  Engineering_enum:
    permissible_values:
      Deletion:
        description: Deletion
      Insertion:
        description: Insertion
      plasmid:
        description: plasmid
  FAO_enum:
    permissible_values:
      Acrisols:
        description: Acrisols
      Andosols:
        description: Andosols
      Arenosols:
        description: Arenosols
      Cambisols:
        description: Cambisols
      Chernozems:
        description: Chernozems
      Ferralsols:
        description: Ferralsols
      Fluvisols:
        description: Fluvisols
      Gleysols:
        description: Gleysols
      Greyzems:
        description: Greyzems
      Gypsisols:
        description: Gypsisols
      Histosols:
        description: Histosols
      Kastanozems:
        description: Kastanozems
      Lithosols:
        description: Lithosols
      Luvisols:


In [6]:
yaml_mapped = scoped_mapping.map_from_yaml(my_model, my_selected_enum,
                                           print_enums     = True,
                                           bad_chars       = my_bad_chars,
                                           cat_name        = my_selected_enum,
                                           ontoprefix      = my_onto_prefix,
                                           query_fields    = my_query_fields,
                                           string_dist_arg = my_string_dist_arg,
                                           rr              = my_row_request)

['Engineering_enum', 'FAO_enum', 'Taxon_enum']


In [7]:
my_best_acceptable = scoped_mapping.get_best_acceptable(yaml_mapped, max_string_dist=my_max_string_dist)

my_best_acceptable

Unnamed: 0,category,raw,query,name,string_dist_rank,string_dist,obo_id,label,search_rank,ontology_prefix,scope,type,iri,ontology_name
3,Taxon_enum,Aequorea.victoria,aequorea victoria,Aequorea victoria,2,0.0,NCBITaxon:6100,Aequorea victoria,1,NCBITAXON,label,label,http://purl.obolibrary.org/obo/NCBITaxon_6100,ncbitaxon
0,Taxon_enum,Aequorea_victoria,aequorea victoria,Aequorea victoria,1,0.0,NCBITaxon:6100,Aequorea victoria,1,NCBITAXON,label,label,http://purl.obolibrary.org/obo/NCBITaxon_6100,ncbitaxon
14,Taxon_enum,Arabidopsis thaliana,arabidopsis thaliana,Arabidopsis thaliana,1,0.0,NCBITaxon:3702,Arabidopsis thaliana,1,NCBITAXON,label,label,http://purl.obolibrary.org/obo/NCBITaxon_3702,ncbitaxon
25,Taxon_enum,Ashbya.gossypii,ashbya gossypii,Ashbya gossypii,1,0.0,NCBITaxon:33169,Eremothecium gossypii,1,NCBITAXON,hasRelatedSynonym,synonym,http://purl.obolibrary.org/obo/NCBITaxon_33169,ncbitaxon
72,Taxon_enum,Bacteriophage.Mu,bacteriophage mu,Bacteriophage Mu,1,0.0,NCBITaxon:2681603,Escherichia phage Mu,1,NCBITAXON,hasExactSynonym,equivalent name,http://purl.obolibrary.org/obo/NCBITaxon_2681603,ncbitaxon
135,Taxon_enum,Discosoma.sp,discosoma sp,Discosoma sp.,1,0.043,NCBITaxon:86600,Discosoma sp.,1,NCBITAXON,label,label,http://purl.obolibrary.org/obo/NCBITaxon_86600,ncbitaxon
164,Taxon_enum,Lentivirus.human-immunodeficiency-virus1,lentivirus human immunodeficiency virus1,Human immunodeficiency virus,1,0.12,NCBITaxon:12721,Human immunodeficiency virus,3,NCBITAXON,label,label,http://purl.obolibrary.org/obo/NCBITaxon_12721,ncbitaxon
172,Taxon_enum,Nepovirus.Tobacco-ringspot-virus,nepovirus tobacco ringspot virus,Tobacco ringspot virus,1,0.114,NCBITaxon:12282,Tobacco ringspot virus,1,NCBITAXON,label,label,http://purl.obolibrary.org/obo/NCBITaxon_12282,ncbitaxon
191,Taxon_enum,Salmonella enterica subsp. enterica serovar Ty...,salmonella enterica subsp enterica serovar typ...,Salmonella enterica subsp. enterica serovar Ty...,1,0.018,NCBITaxon:90371,Salmonella enterica subsp. enterica serovar Ty...,1,NCBITAXON,label,label,http://purl.obolibrary.org/obo/NCBITaxon_90371,ncbitaxon
221,Taxon_enum,Simian.virus.40,simian virus 40,Simian virus 41,1,0.071,NCBITaxon:2560766,Simian orthorubulavirus,2,NCBITAXON,hasExactSynonym,equivalent name,http://purl.obolibrary.org/obo/NCBITaxon_2560766,ncbitaxon


In [8]:
no_acceptable_mappings = scoped_mapping.get_no_acceptable_mappings(yaml_mapped, my_best_acceptable)

no_acceptable_mappings

Unnamed: 0,category,raw,query,name,string_dist_rank,string_dist,obo_id,label,search_rank,ontology_prefix,scope,type,iri,ontology_name
138,Taxon_enum,herpes.simplex.virus-1,herpes simplex virus 1,Herpes simplex virus unknown type,1,0.201,NCBITaxon:126283,Herpes simplex virus unknown type,1,NCBITAXON,label,label,http://purl.obolibrary.org/obo/NCBITaxon_126283,ncbitaxon
139,Taxon_enum,herpes.simplex.virus-1,herpes simplex virus 1,Opheodrys herpes virus 1,2,0.326,NCBITaxon:2321065,Opheodrys herpes virus 1,2,NCBITAXON,label,label,http://purl.obolibrary.org/obo/NCBITaxon_2321065,ncbitaxon
140,Taxon_enum,herpes.simplex.virus-1,herpes simplex virus 1,Oyster herpes-like virus,3,0.541,NCBITaxon:72012,Oyster herpes-like virus,3,NCBITAXON,label,label,http://purl.obolibrary.org/obo/NCBITaxon_72012,ncbitaxon
169,Taxon_enum,,na,Bacillus sp. B4-WWTP-NA-D-NA-NA,1,0.548,NCBITaxon:2653216,Bacillus sp. B4-WWTP-NA-D-NA-NA,1,NCBITAXON,label,label,http://purl.obolibrary.org/obo/NCBITaxon_2653216,ncbitaxon
170,Taxon_enum,,na,Spinadesha sp. BNHM-NA,2,0.6,NCBITaxon:322292,Spinadesha sp. BNHM-NA,2,NCBITAXON,label,label,http://purl.obolibrary.org/obo/NCBITaxon_322292,ncbitaxon
171,Taxon_enum,,na,Pseudoshirakia sp. BNHM-NA,3,0.8,NCBITaxon:322290,Pseudoshirakia sp. BNHM-NA,3,NCBITAXON,label,label,http://purl.obolibrary.org/obo/NCBITaxon_322290,ncbitaxon
181,Taxon_enum,phage.lambda,phage lambda,Escherichia phage Lambda,1,0.337,NCBITaxon:2681611,Escherichia phage Lambda,1,NCBITAXON,label,label,http://purl.obolibrary.org/obo/NCBITaxon_2681611,ncbitaxon
182,Taxon_enum,phage.lambda,phage lambda,Bacillus phage lambda Ba03,2,0.362,NCBITaxon:229345,Bacillus phage lambda Ba03,2,NCBITAXON,label,label,http://purl.obolibrary.org/obo/NCBITaxon_229345,ncbitaxon
185,Taxon_enum,phage.lambda,phage lambda,Bacillus phage lambda Ba04,3,0.362,NCBITaxon:229346,Bacillus phage lambda Ba04,3,NCBITAXON,label,label,http://purl.obolibrary.org/obo/NCBITaxon_229346,ncbitaxon
183,Taxon_enum,phage.lambda,phage lambda,Bacillus anthracis phage lambda Ba03,4,0.494,NCBITaxon:229345,Bacillus phage lambda Ba03,2,NCBITAXON,hasExactSynonym,equivalent name,http://purl.obolibrary.org/obo/NCBITaxon_229345,ncbitaxon


In [9]:
scoped_mapping.rewrite_yaml(my_model, my_selected_enum, my_best_acceptable)
yaml.safe_dump(my_model, sys.stdout, default_flow_style=False)

classes:
  example:
    slot_usage: {}
    slots:
    - Taxon
    - FAO
    - Engineering
default_prefix: example
description: example
enums:
  Engineering_enum:
    permissible_values:
      Deletion:
        description: Deletion
      Insertion:
        description: Insertion
      plasmid:
        description: plasmid
  FAO_enum:
    permissible_values:
      Acrisols:
        description: Acrisols
      Andosols:
        description: Andosols
      Arenosols:
        description: Arenosols
      Cambisols:
        description: Cambisols
      Chernozems:
        description: Chernozems
      Ferralsols:
        description: Ferralsols
      Fluvisols:
        description: Fluvisols
      Gleysols:
        description: Gleysols
      Greyzems:
        description: Greyzems
      Gypsisols:
        description: Gypsisols
      Histosols:
        description: Histosols
      Kastanozems:
        description: Kastanozems
      Lithosols:
        description: Lithosols
      Luvisols:
