In [1]:
import requests

url_barley = "https://divbrowse.ipk-gatersleben.de/agent_barley/brapi/v2"
url_wheat = "https://divbrowse.ipk-gatersleben.de/agent_wheat/brapi/v2"
url_all = "https://agent.ipk-gatersleben.de/genotyping/brapi/v2"


In [2]:
def brapiRequest(baseUrl,call,params={}):
    try:
        headers = {"Accept": "application/json"}
        url = "{}/{}".format(baseUrl,call)
        fullUrl = "{}?{}".format(url,"&".join(["{}={}".format(x,y) for x,y in params.items()]))
        response = requests.get(url, params=params, headers=headers)
        try:
            if response.ok:
                return response.json(), response.status_code, None, fullUrl
            else:
                return None, response.status_code, response.text, fullUrl
        except:
            return None, 500, response.text, fullUrl
    except Exception as e:
        return None, 500, "error: {}".format(str(e)), None


In [3]:
def getAllelematrixVariantsCallsets(url):
    variants = []
    callsets = []
    totalVariantsPages = 1
    totalCallsetsPages = 1
    variantsPage = 0
    callsetsPage = 0
    while variantsPage<totalVariantsPages:
        (response, responseCode, responseError, responseUrl) = brapiRequest(url, "allelematrix", 
                                       {"preview":"true", 
                                        "dimensionVariantPage": variantsPage,
                                        "dimensionVariantPageSize": 10000,
                                        "dimensionCallSetPageSize": 1})
        for item in response["result"]["pagination"]:
            if item["dimension"]=="VARIANTS":
                totalVariantsPages = item["totalPages"]
        variants = variants + response["result"]["variantDbIds"]
        # print("variants",variantsPage,totalVariantsPages)
        variantsPage+=1
    while callsetsPage<totalCallsetsPages:
        (response, responseCode, responseError, responseUrl) = brapiRequest(url, "allelematrix", 
                                       {"preview":"true", 
                                        "dimensionCallSetPage": callsetsPage,
                                        "dimensionCallSetPageSize": 10000,
                                        "dimensionVariantPageSize": 1})
        for item in response["result"]["pagination"]:
            if item["dimension"]=="CALLSETS":
                totalCallsetsPages = item["totalPages"]
        callsets = callsets + response["result"]["callSetDbIds"]
        # print("callsets",callsetsPage,totalCallsetsPages)
        callsetsPage+=1
    return variants,callsets

allelematrix_variants_barley, allelematrix_callsets_barley = getAllelematrixVariantsCallsets(url_barley)
allelematrix_variants_wheat, allelematrix_callsets_wheat = getAllelematrixVariantsCallsets(url_wheat)


In [4]:
def getAllIdentifiers(url,call,identifier):
    page = 0
    totalPages = 1
    results = []
    while page<totalPages:
        (response, responseCode, responseError, responseUrl) = brapiRequest(url, call, 
                                       {"page": page, "pageSize": 10000})
        for item in response["result"]["data"]:
            results.append(item[identifier])
        totalPages = response["metadata"]["pagination"]["totalPages"]
        page+=1
    return results

In [5]:
variants_barley = getAllIdentifiers(url_barley,"variants","variantDbId")
variants_wheat = getAllIdentifiers(url_wheat,"variants","variantDbId")
#expect empty sets
print(set(allelematrix_variants_barley).difference(variants_barley))
print(set(allelematrix_variants_wheat).difference(variants_wheat))
print(set(variants_barley).difference(allelematrix_variants_barley))
print(set(variants_wheat).difference(allelematrix_variants_wheat))

set()
set()
set()
set()


In [6]:
callsets = getAllIdentifiers(url_all,"callsets","callSetDbId")

In [7]:
print("wheat: allelematrix %s callsets, %s are in callsets, %s not in callsets" % (
    len(set(allelematrix_callsets_wheat)),
    len(set(allelematrix_callsets_wheat).intersection(callsets)),
    len(set(allelematrix_callsets_wheat).difference(callsets))
))
print("barley: allelematrix %s callsets, %s are in callsets, %s not in callsets" % (
    len(set(allelematrix_callsets_barley)),
    len(set(allelematrix_callsets_barley).intersection(callsets)),
    len(set(allelematrix_callsets_barley).difference(callsets))
))
print("%s callsets not in allelematrix wheat or barley" % (
    len(set(callsets).difference(allelematrix_callsets_wheat).difference(allelematrix_callsets_barley))
))

wheat: allelematrix 15225 callsets, 6957 are in callsets, 8268 not in callsets
barley: allelematrix 26737 callsets, 5253 are in callsets, 21484 not in callsets
59 callsets not in allelematrix wheat or barley


In [11]:
sorted(set(allelematrix_callsets_wheat).difference(callsets))[0:10]

['SAMEA10922514',
 'SAMEA10922515',
 'SAMEA10922516',
 'SAMEA10922517',
 'SAMEA10922518',
 'SAMEA10922519',
 'SAMEA10922520',
 'SAMEA10922521',
 'SAMEA10922522',
 'SAMEA10922523']

In [12]:
sorted(set(allelematrix_callsets_barley).difference(callsets))[0:10]

['SAMEA10401827',
 'SAMEA10401851',
 'SAMEA10401852',
 'SAMEA10401853',
 'SAMEA10401854',
 'SAMEA10401855',
 'SAMEA10401856',
 'SAMEA10401857',
 'SAMEA10401862',
 'SAMEA10401865']

In [13]:
sorted(set(callsets).difference(allelematrix_callsets_wheat).difference(allelematrix_callsets_barley))[0:10]

['None',
 'SAMEA10402005',
 'SAMEA10402105',
 'SAMEA10402112',
 'SAMEA10402115',
 'SAMEA10402138',
 'SAMEA10402208',
 'SAMEA10402275',
 'SAMEA10402285',
 'SAMEA10402349']

In [17]:
#check examples from e-mail
entries = ["SAMEA7824966", "SAMEA7825972", "SAMEA7828512", "SAMEA7826208", "SAMEA7821926", "SAMEA7827342"]
for item in entries:
    print(item in allelematrix_callsets_wheat, item in callsets)

True False
True False
True False
True False
True False
True False
