### Breeding Analytics data gathering module

Below example show how to extract phenotype data from different data sources to be used for Anaytics pipeline.

The datasources provide REST API to extract following entities,

Plots - List of the plots under an experiment

Plot Measurements - List of measurements for traits in each plot

Occurrence - Corresponds to studies in Brapi specification

Experiment - Corresponds to trial in Brapi specification

Trait - Observation variables in Brapi

In [3]:
!pwd

/home/vg249/af-core


In [5]:
# DataReaderFactory is factory module which generates objects to read phenotype data for a given data source

from data_reader.data_reader_factory import DataReaderFactory

from models.enums import DataSource 

### Extract data from EBS datasource

In [41]:
api_url = "https://b4rapi-uat.ebsproject.org/v3/"

In [42]:
api_token = "eyJ4NXQiOiJaalJtWVRNd05USmpPV1U1TW1Jek1qZ3pOREkzWTJJeU1tSXlZMkV6TWpkaFpqVmlNamMwWmciLCJraWQiOiJaalJtWVRNd05USmpPV1U1TW1Jek1qZ3pOREkzWTJJeU1tSXlZMkV6TWpkaFpqVmlNamMwWmdfUlMyNTYiLCJhbGciOiJSUzI1NiJ9.eyJhdF9oYXNoIjoiLXNkVzI2OVlqREhta2lvVWVDbVlrUSIsImh0dHA6XC9cL3dzbzIub3JnXC9jbGFpbXNcL3VzZXJuYW1lIjoiMTA4OTQyNzQyODI2NzExNDc0MjQ0Iiwic3ViIjoidmcyNDlAY29ybmVsbC5lZHUiLCJodHRwOlwvXC93c28yLm9yZ1wvY2xhaW1zXC9waG90b3VybCI6Imh0dHBzOlwvXC9saDQuZ29vZ2xldXNlcmNvbnRlbnQuY29tXC8tSG1tSkJMRmRjZkFcL0FBQUFBQUFBQUFJXC9BQUFBQUFBQUFBQVwvQU1adXVjbF9yWTdQRFBJTnpZZ0NhMjZ2MVl6R3VIellkZ1wvczk2LWNcL3Bob3RvLmpwZyIsImFtciI6WyJTQU1MU1NPQXV0aGVudGljYXRvciJdLCJpc3MiOiJodHRwczpcL1wvc2cuZWJzcHJvamVjdC5vcmc6OTQ0M1wvb2F1dGgyXC90b2tlbiIsImh0dHA6XC9cL3dzbzIub3JnXC9jbGFpbXNcL2dpdmVubmFtZSI6IlZpc2hudSIsImh0dHA6XC9cL3dzbzIub3JnXC9jbGFpbXNcL2lkZW50aXR5XC9lbWFpbFZlcmlmaWVkIjoidHJ1ZSIsImF1ZCI6IlNONl96UUNxcnBwelp0bUVmVlhuRGpnVXlvd2EiLCJjX2hhc2giOiJKcUY3RWRScExNOVBreG43aWVkMzV3IiwibmJmIjoxNjE2MTcyMDU3LCJodHRwOlwvXC93c28yLm9yZ1wvY2xhaW1zXC9sb2NhbCI6ImVuIiwiaHR0cDpcL1wvd3NvMi5vcmdcL2NsYWltc1wvZnVsbG5hbWUiOiJWaXNobnUgR292aW5kYXJhaiIsImF6cCI6IlNONl96UUNxcnBwelp0bUVmVlhuRGpnVXlvd2EiLCJodHRwOlwvXC93c28yLm9yZ1wvY2xhaW1zXC9lbWFpbGFkZHJlc3MiOiJ2ZzI0OUBjb3JuZWxsLmVkdSIsImh0dHA6XC9cL3dzbzIub3JnXC9jbGFpbXNcL2xhc3RuYW1lIjoiR292aW5kYXJhaiIsImV4cCI6MTYxNjE3NTY1NywiaWF0IjoxNjE2MTcyMDU3fQ.aD8_2wyjDPl38pdnJEH2f3XsuhoegsJvVaQiDMB_1E9BQzHHtJ-gfPYd24s-8NfFrBFig30fqtEbFre6RS4iNbM6-oeq1JBVTkxKmKa318dk7yudN-JP4PaDh4NDyxyfotuz6WbzXeCmi_WUZTQFIEK6t2iP5a25yIdRkDUKgqhkVuAu03NxSK6hlmSPPrrVia0MpMtCTMGTDrLVTAvSwnlqlURZbTufwwZLycV4QWNvAMJ_XdNU4zYUQ93zbLZ1s6h28O4IElJ-3O9buYKG5QpHqd64fp0MPUjSw5L1hwj8AbdEbXglFePWoeP3mbbJ2uR4Z7J_r9bCXoIhShE-Jw"

In [43]:
# get phenotype data interface for Ebs
phenotype_data = DataReaderFactory(DataSource.EBS).get_pheotype_data(
    api_base_url=api_url, 
    api_bearer_token=api_token)

#### Get plot and plot measurements

In [44]:
phenotype_data.get_plots(occurrence_id=74)

Unnamed: 0,blk,pa_x,plot_id,pa_y,rep_factor,entry_id,plot_qc,occurrence_id,location_id,experiment_id
0,1,,59406,,1,72804,G,74,74,4
1,1,,285118,,1,280279,G,74,74,4
2,1,,285115,,1,280278,G,74,74,4
3,1,,431501,,1,407542,G,74,74,4
4,1,,285107,,1,280277,G,74,74,4
...,...,...,...,...,...,...,...,...,...,...
62,1,,285150,,1,283879,G,74,74,4
63,1,,285151,,2,283879,G,74,74,4
64,1,,285120,,1,280280,G,74,74,4
65,1,,285121,,1,280281,G,74,74,4


In [45]:
phenotype_data.get_plot_measurements(occurrence_id=74)

NotImplementedError: 

In [46]:
phenotype_data.get_occurrence(occurrence_id=74)

Occurrence(occurrence_id=74, occurrence_name='RGA2013WS-2', experiment_id=4, experiment_name='IRSEA-RGA-2013-WS-2', location_id=74, location='10001', rep_count=0, entry_count=32, plot_count=67)

### Extract data from Brapi data source

In [47]:
api_url = "https://solgenomics.net/brapi/v2/"

In [48]:
# get phenotype data interface for Brapi data soruce
phenotype_data = DataReaderFactory(DataSource.BRAPI).get_pheotype_data(api_base_url=api_url)

#### Get plot and plot measurements

In [49]:
phenotype_data.get_plots(occurrence_id=68)

Unnamed: 0,blk,location_id,pa_x,entry_id,pa_y,rep_factor,occurrence_id,plot_id,experiment_id,plot_qc
0,1,15,1,30826,1,1,68,31089,,G
1,1,15,1,30827,2,1,68,31016,,G
2,1,15,1,30828,4,1,68,31122,,G
3,1,15,1,30829,5,1,68,31129,,G
4,1,15,1,30830,6,1,68,31046,,G
...,...,...,...,...,...,...,...,...,...,...
157,1,15,2,29298,16,1,68,31113,,G
158,1,15,2,29298,18,1,68,31038,,G
159,1,15,4,29298,8,1,68,31039,,G
160,1,15,5,29298,1,1,68,30983,,G


In [50]:
phenotype_data.get_plot_measurements(occurrence_id=68)

Unnamed: 0,trait_value,trait_id,plot_id,trait_qc
0,2.76666666666667,39966,31089,G
1,3.56,39966,31016,G
2,7.55,39966,31122,G
3,3.08888888888889,39966,31129,G
4,7.77857142857143,39966,31046,G
...,...,...,...,...
995,24.261,56263,31180,G
996,33.2564,56252,31180,G
997,72.9655,56243,31180,G
998,46.8456,56254,31180,G


In [51]:
phenotype_data.get_occurrence(occurrence_id=68)

Occurrence(occurrence_id=68, occurrence_name='17S62', experiment_id=54, experiment_name='U Georgia', location_id=15, location='Blairsville GA', rep_count=None, entry_count=None, plot_count=None)