In [1]:
import json
import numpy as np
import pandas as pd
import requests

### ontologies.json
- traits annotated by Laurel Cooper

In [3]:
df_0 = pd.read_csv('data/interim/ontology_annotations.csv')
print(df_0.shape)
df_0.head()

(85, 13)


Unnamed: 0,Variable,Variable Description,Units,Method Name 1,Method Name 2,Trait Ontology Identifier (TO:ID),termId,TO label,Comments/Notes,CO_324 ID,CO_324 label,Comments/Notes.1,coPermalink
0,days_to_emergence,Planting date: 2017-04-20,days,,,TO:0006061,http://purl.obolibrary.org/obo/TO_0006061,days to germinate,related,,,,
1,emergence_count,number of plants counted within plot or subplo...,count,plant emergence counts,,TO:0000202,http://purl.obolibrary.org/obo/TO_0000202,germination ratio,related,CO_324:0000486,sorghum plant number - counting method,related,http://www.cropontology.org/terms/CO_324:00004...
2,seedling_emergence_rate,Seedling emergence rate,ratio,Seedling emergence rate count,,TO:0000430,http://purl.obolibrary.org/obo/TO_0000430,germination rate,related,,,,
3,flag_leaf_emergence_time,Number of days from sowing to the date when 50...,days,Visual classification of sorghum growth stages...,,TO:0001008,http://purl.obolibrary.org/obo/TO_0001008,days to flag leaf emergence,exact,CO_324:0000274,sorghum flag leaf ligulation time trait,exact,https://www.cropontology.org/terms/CO_324:0000...
4,aboveground_dry_biomass,Aboveground dry biomass,kg / ha,Whole above ground biomass at harvest,,TO:0000327\n,http://purl.obolibrary.org/obo/TO_0000327,shoot system yield trait\n,subclass,CO_324:0000553,sorghum whole above ground biomass yield at ha...,,https://www.cropontology.org/terms/CO_324:0000...


In [4]:
to_values = df_0['Trait Ontology Identifier (TO:ID)'].values
new_values = []

for v in to_values:

    if type(v) == str:

        if v.startswith('TO:'):
            new_values.append(v)

        elif v.startswith(' '):
            new_values.append(v.strip())

        elif v.str.endswith('\n'):
            new_values.append(v[:-1])

        else:
            print('Something is wrong.')

new_values[0:5]        

['TO:0006061', 'TO:0000202', 'TO:0000430', 'TO:0001008', 'TO:0000327\n']

In [5]:
to_urls = []

for ti in df_0.termId.values:
    
    if type(ti) == str:
        
        to_urls.append(ti)

In [6]:
unique_to_urls = list(set(to_urls))
len(unique_to_urls)

33

#### Elements needed 

- authors (leave out for now)
- copyright
- description
- documentationURL
- license
- ontologyDbId
- ontologyName
- version

In [7]:
to_terms = []

for to in unique_to_urls:
    
    term = to[-10:]
    to_terms.append(term)
    
to_terms[0:5]

['TO_0000446', 'TO_0001004', 'TO_0000370', 'TO_0000559', 'TO_0020115']

In [8]:
documentationURLs = unique_to_urls
ontologyDbIds = to_terms

df = pd.DataFrame(data = {'copyright': '2000-2008 Cold Spring Harbor Laboratory and Cornell University, USA',
                         'description': 'Plant Trait Ontology', 'documentationURL': unique_to_urls,
                         'license': 'CC-BY 4.0', 'ontologyDbId': to_terms, 'ontologyName': 'Plant Trait Ontology',
                         'version': '1.2'})

print(df.shape)
df.head(3)

(33, 7)


Unnamed: 0,copyright,description,documentationURL,license,ontologyDbId,ontologyName,version
0,2000-2008 Cold Spring Harbor Laboratory and Co...,Plant Trait Ontology,http://purl.obolibrary.org/obo/TO_0000446,CC-BY 4.0,TO_0000446,Plant Trait Ontology,1.2
1,2000-2008 Cold Spring Harbor Laboratory and Co...,Plant Trait Ontology,http://purl.obolibrary.org/obo/TO_0001004,CC-BY 4.0,TO_0001004,Plant Trait Ontology,1.2
2,2000-2008 Cold Spring Harbor Laboratory and Co...,Plant Trait Ontology,http://purl.obolibrary.org/obo/TO_0000370,CC-BY 4.0,TO_0000370,Plant Trait Ontology,1.2


In [9]:
record_dict = df.to_dict(orient='records')
record_dict[0:2]

[{'copyright': '2000-2008 Cold Spring Harbor Laboratory and Cornell University, USA',
  'description': 'Plant Trait Ontology',
  'documentationURL': 'http://purl.obolibrary.org/obo/TO_0000446',
  'license': 'CC-BY 4.0',
  'ontologyDbId': 'TO_0000446',
  'ontologyName': 'Plant Trait Ontology',
  'version': '1.2'},
 {'copyright': '2000-2008 Cold Spring Harbor Laboratory and Cornell University, USA',
  'description': 'Plant Trait Ontology',
  'documentationURL': 'http://purl.obolibrary.org/obo/TO_0001004',
  'license': 'CC-BY 4.0',
  'ontologyDbId': 'TO_0001004',
  'ontologyName': 'Plant Trait Ontology',
  'version': '1.2'}]

### methods.json
**Sources**
- betydb API `methods` [endpoint](https://terraref.ncsa.illinois.edu/bety/api/v1/methods)

In [10]:
url = 'https://terraref.ncsa.illinois.edu/bety/api/v1/methods'
response = requests.get(url)

In [11]:
response_json = response.json()
# response_json

In [12]:
type(response_json)

dict

In [13]:
len(response_json)

2

In [14]:
response_json.keys()

dict_keys(['metadata', 'data'])

In [15]:
len(response_json['data'])

89

In [16]:
response_json['data'][0]

{'method': {'id': 6000000001,
  'name': 'PlantCV',
  'description': '',
  'citation_id': 6000000005,
  'created_at': '2016-10-19 16:49:41 -0500',
  'updated_at': '2016-10-19 16:49:41 -0500',
  'number of associated traits': 5490,
  'number of associated yields': 0,
  'view_url': 'https://terraref.ncsa.illinois.edu/bety/methods/6000000001',
  'edit_url': 'https://terraref.ncsa.illinois.edu/bety/methods/6000000001/edit'}}

#### Compare with methods from ontology annotations

In [17]:
csv_methods = df_0['Method Name 1'].unique()
len(csv_methods)

28

In [18]:
betydb_methods = []

for i in range(len(response_json['data'])):

    betydb_method = response_json['data'][i]['method']['name']
    betydb_methods.append(betydb_method)

In [19]:
betydb_methods[:5]

['PlantCV',
 '95th quantiles height Estimation from 3D Scanner',
 'Manual canopy height',
 'Specific Leaf Area Leaf Punch',
 'Mature leaf length']

In [20]:
len(set(betydb_methods))

89

#### Methods response structure

In [21]:
# "result": {
#     "data": [
#       {
#         "additionalInfo": {
#           "additionalProp1": "string",
#           "additionalProp2": "string",
#           "additionalProp3": "string"
#         },
#         "bibliographicalReference": "Smith, 1893, Really Cool Paper, Popular Journal",
#         "description": "A measuring tape was used",
#         "externalReferences": [
#           {
#             "referenceID": "doi:10.155454/12341234",
#             "referenceSource": "DOI"
#           },
#           {
#             "referenceID": "http://purl.obolibrary.org/obo/ro.owl",
#             "referenceSource": "OBO Library"
#           },
#           {
#             "referenceID": "75a50e76",
#             "referenceSource": "Remote Data Collection Upload Tool"
#           }
#         ],
#         "formula": "a^2 + b^2 = c^2",
#         "methodClass": "Measurement",
#         "methodName": "Measuring Tape",
#         "ontologyReference": {
#           "documentationLinks": [
#             {
#               "URL": "http://purl.obolibrary.org/obo/ro.owl",
#               "type": "OBO"
#             }
#           ],
#           "ontologyDbId": "6b071868",
#           "ontologyName": "The Crop Ontology",
#           "version": "7.2.3"
#         },
#         "methodDbId": "0adb2764"
#       }
#     ]
#   }
# }

In [22]:
response_json['data'][1]['method']

{'id': 6000000004,
 'name': '95th quantiles height Estimation from 3D Scanner',
 'description': '1. remove the lowest points, assuming that these represent the ground, 2. compute the height below which 95% of the points occur',
 'citation_id': 6000000008,
 'created_at': '2016-11-08 16:45:58 -0600',
 'updated_at': '2016-11-08 16:46:18 -0600',
 'number of associated traits': 0,
 'number of associated yields': 0,
 'view_url': 'https://terraref.ncsa.illinois.edu/bety/methods/6000000004',
 'edit_url': 'https://terraref.ncsa.illinois.edu/bety/methods/6000000004/edit'}

In [23]:
def get_citation(citation_id):
    # Could make a request to an API, or look up in a dictionary
    citation = {
        'id': None,
        'author': None,
        'year': None
    }
    return citation

In [24]:
def generate_brapi_dict(betydb_dict):
    # print(betydb_dict)
    method_name = betydb_dict['method']['name']
    method_description = betydb_dict['method']['description']
    method = {
        'name': method_name,
        'description': method_description
    }
    
    citation_id = betydb_dict['method']['citation_id']
    citation = get_citation(citation_id)
    
    # print(method_name)
    brapi_dict = {
        'method': method,
        'citation': citation
    }
    return brapi_dict

In [25]:
def generate_brapi_list(betydb_list):
    brapi_list = []
    for betydb_dict in betydb_list:
        brapi_dict = generate_brapi_dict(betydb_dict)
        brapi_list.append(brapi_dict)
    return brapi_list

In [26]:
brapi_list = generate_brapi_list(response_json['data'])

In [27]:
brapi_list[0:2]

[{'method': {'name': 'PlantCV', 'description': ''},
  'citation': {'id': None, 'author': None, 'year': None}},
 {'method': {'name': '95th quantiles height Estimation from 3D Scanner',
   'description': '1. remove the lowest points, assuming that these represent the ground, 2. compute the height below which 95% of the points occur'},
  'citation': {'id': None, 'author': None, 'year': None}}]

In [28]:
with open('data/processed/brapi_methods.json', 'w') as outfile:
    json.dump(brapi_list, outfile)