In [None]:
!pip install --upgrade pip
!pip install fhir.resources
#https://mybinder.org/
#https://realpython.com/python-json/



In [None]:
import requests

endpoint= "https://ontoserver.csiro.au/stu3-latest"
valueSetExpand="/ValueSet/$expand?url=http://snomed.info/sct?fhir_vs=" #should be less/break down
expand_API=endpoint+valueSetExpand

menagerie=[]
#List of simple "animal" tuples
menagerie.append((406675000,'Crocodile'))
menagerie.append((33612001,'Dolphin'))
menagerie.append((12978006,'Goldfish'))
menagerie.append((395556009,'Platypus'))
menagerie.append((9354008,'Zebra'))
menagerie.append((57013002,'Koala'))
menagerie.append((18875000,'Cobra'))
menagerie.append((46783000,'Wallaby'))

#Some classes to play with
eclClasses=[]
eclClasses.append(('ecl/<388002008','Monotremes'))
eclClasses.append(('ecl/<388006006','Marsupials'))
eclClasses.append(('ecl/<387976007','Mammals'))
eclClasses.append(('ecl/<90580008','Fish'))
eclClasses.append(('ecl/<107241004','Reptiles'))

print('classes and buckets created')

In [None]:
#retreive all the codes in each valueset expansion.
#Store as a hashSet for O(1) lookup.

#convert the respose into a VS object
from fhir.resources.valueset import ValueSet 

buckets=[]

for ecl in eclClasses:
    response=requests.get(expand_API+ecl[0])
    j=response.json()
    vs=ValueSet(j)
    #add every code to set()
    newSet=set()
    for e in vs.expansion.contains:          
        newSet.add(int(e.code))
    #add a "bucket" tuple with the name and set
    buckets.append((ecl[1],newSet))
    #200 works. However, if set is too big, pagination needs to happen
    print(response.status_code,' ',ecl[1],' bucket created')

print('all buckets have been created')

In [None]:

#cycle through the buckets and list all the animals that can sit in that bucket
for bucket in buckets:
    print(bucket[0])
    for animal in menagerie:
        if animal[0] in bucket[1]:
            print('\t',animal[1])
        

In [None]:
#so what if we only want each animal in a single bucket?
#One option is to ensure our buckets are in priority order, and remove the animal once it's been classified
# O(m*n)

tempAnimals = menagerie.copy()

#cycle through the buckets and list all the animals that can sit in that bucket
for bucket in buckets:
    print(bucket[0])
    for animal in tempAnimals:        
        if animal[0] in bucket[1]:
            print('\t',animal[1])
            tempAnimals.remove(animal)

In [None]:
#OR
#cycle animals first, attempting to classify each one. And break once classified.
#O(n*m)
tempAnimals = menagerie.copy()
for animal in tempAnimals:
    for bucket in buckets:
        if  animal[0] in bucket[1]:
            print(animal[1],' is in ',bucket[0])
            break

In [None]:
#OR a third approach is to actually populate the buckets under the requirements they be disjoint.
# by populating our buckets only with contain animals that have not yet "bucketed" already

#Super Class for all animals
Animals=('ecl/<387961004','Animals')

response=requests.get(expand_API+Animals[0])
j=response.json()
vs=ValueSet(j)
#add every code to set()
newSet=set()
for e in vs.expansion.contains:          
    newSet.add(int(e.code))
#SuperSet containing all available animals
AvailableAnimals=(Animals[1],newSet)

#now we repopulate our buckets
buckets=[]

for ecl in eclClasses:
    response=requests.get(expand_API+ecl[0])
    j=response.json()
    vs=ValueSet(j)
    #add every code to set()
    newSet=set()
    for e in vs.expansion.contains:          
        newSet.add(int(e.code))
    #limit newSet to those also in AvailableAnimals
    newSet=newSet.intersection(AvailableAnimals[1])
    #remove this newSet from AvailableAnimals
    AvailableAnimals[1].difference_update(newSet)
    
    buckets.append((ecl[1],newSet))
    #200 works. However, if set is too big, pagination needs to happen
    print(response.status_code,' ',ecl[1],' bucket created')

In [None]:
#cycle through the buckets and list all the animals that can sit in that bucket
for bucket in buckets:
    print(bucket[0])
    for animal in menagerie:
        if animal[0] in bucket[1]:
            print('\t',animal[1])

In [None]:
#All these methods are still O(n*m) performance.
#Worst case must always cycle through every bucket (m) and then every item in menagerie (n)
#vectors would probably improve performance. Storing results as a matrix.