In [3]:
from SPARQLWrapper import SPARQLWrapper, JSON
import re
import nltk
from nltk import wordnet as wn
from nltk.stem import *
try:
    words = set(nltk.corpus.words.words())
except LookupError:
    nltk.download()
    words = set(nltk.corpus.words.words())

In [4]:
def query_allergies():
    
    api_key='fernando'
    sparql = SPARQLWrapper("http://sparql.bioontology.org/sparql")
    sparql.addCustomParameter("apikey", api_key)
    sparql.addCustomParameter("soft-limit", "-1")
    sparql.setQuery("""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX snomed-term: <http://purl.bioontology.org/ontology/SNOMEDCT/>
        PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
        PREFIX food: <http://data.lirmm.fr/ontologies/food#>

        SELECT DISTINCT ?allergy_label ?allergy ?allergen ?allergen_label
        WHERE
        {
            {
                ?allergy rdfs:subClassOf ?a .
                ?allergy snomed-term:has_causative_agent ?allergen .
                ?allergy skos:altLabel ?allergy_label .
                ?allergen skos:altLabel ?allergen_label.
                filter regex(lcase(str(?allergy_label)), 'llerg').
            }
        }
    """)
    sparql.setReturnFormat(JSON)
    results_allergies = sparql.query().convert()
    return results_allergies

In [6]:
all_allergens = set()
all_allergies = query_allergies()
for allergy in all_allergies['results']['bindings']:
    all_allergens.add(allergy['allergen_label']['value'].lower())
del(allergy)

In [7]:
def query_drugs(drug):
    
    api_key='fernando'
    sparql = SPARQLWrapper("http://sparql.bioontology.org/sparql")
    sparql.addCustomParameter("apikey", api_key)
    sparql.addCustomParameter("soft-limit", "-1")
    sparql.setQuery("""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX snomed-term: <http://purl.bioontology.org/ontology/SNOMEDCT/>
        PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
        PREFIX food: <http://data.lirmm.fr/ontologies/food#>

        SELECT DISTINCT ?super_substance_label ?substance ?substance_label
        WHERE 
        {
            {
                ?substance rdfs:subClassOf snomed-term:105590001 .
                snomed-term:105590001 skos:prefLabel ?super_substance_label .
                ?substance skos:altLabel ?substance_label.
            }
            union
            {
                ?a rdfs:subClassOf snomed-term:105590001 . 
                ?substance rdfs:subClassOf ?a.      
                ?a skos:prefLabel ?super_substance_label .
                ?substance skos:altLabel ?substance_label.
            }
            union
            {
                ?a rdfs:subClassOf snomed-term:105590001 . 
                ?b rdfs:subClassOf ?a .
                ?substance rdfs:subClassOf ?b .
                ?b skos:prefLabel ?super_substance_label .
                ?substance skos:altLabel ?substance_label.
            }
            union
            {
                ?a rdfs:subClassOf snomed-term:105590001 .
                ?b rdfs:subClassOf ?a .
                ?c rdfs:subClassOf ?b .
                ?substance rdfs:subClassOf ?c .
                ?c skos:prefLabel ?super_substance_label .
                ?substance skos:altLabel ?substance_label.
            }
            union
            {
                ?a rdfs:subClassOf snomed-term:105590001 .
                ?b rdfs:subClassOf ?a .
                ?c rdfs:subClassOf ?b .
                ?d rdfs:subClassOf ?c .
                ?substance rdfs:subClassOf ?d .
                ?d skos:prefLabel ?super_substance_label .
                ?substance skos:altLabel ?substance_label.
            }
            union
            {
                ?a rdfs:subClassOf snomed-term:105590001 .
                ?b rdfs:subClassOf ?a .
                ?c rdfs:subClassOf ?b .
                ?d rdfs:subClassOf ?c .
                ?e rdfs:subClassOf ?d .
                ?substance rdfs:subClassOf ?e .
                ?e skos:prefLabel ?super_substance_label .
                ?substance skos:altLabel ?substance_label.
            }
            union
            {
                ?a rdfs:subClassOf snomed-term:105590001 .
                ?b rdfs:subClassOf ?a .
                ?c rdfs:subClassOf ?b .
                ?d rdfs:subClassOf ?c .
                ?e rdfs:subClassOf ?d .
                ?f rdfs:subClassOf ?e .
                ?substance rdfs:subClassOf ?f .
                ?f skos:prefLabel ?super_substance_label .
                ?substance skos:altLabel ?substance_label.
            }
            FILTER (regex(lcase(str(?substance_label)), '%s'))
        }
    """ % drug)
    sparql.setReturnFormat(JSON)
    results_drugs = sparql.query().convert()
    return results_drugs

In [8]:
def query_ingredient(ingredient):
    
    sparql = SPARQLWrapper("http://localhost:8890/sparql")
    sparql.setQuery("""
        PREFIX food: <http://data.lirmm.fr/ontologies/food#>
        PREFIX owl: <http://www.w3.org/2002/07/owl#>

        SELECT ?name ?ingredients
        WHERE {
                ?product food:IngredientListAsText ?ingredients .
                ?product food:name ?name .
                FILTER regex(str(lcase(?ingredients)), '%s')
        }
    """ % ingredient.lower())
    sparql.setReturnFormat(JSON)
    results_food = sparql.query().convert()
    return results_food


In [9]:
def query_dish(dish_name):
    
    sparql = SPARQLWrapper("http://localhost:8890/sparql")
    sparql.setQuery("""
        PREFIX food: <http://data.lirmm.fr/ontologies/food#>
        PREFIX owl: <http://www.w3.org/2002/07/owl#>

        SELECT ?name ?ingredients
        WHERE {
                ?product food:IngredientListAsText ?ingredients .
                ?product food:name ?name .
                FILTER regex(str(lcase(?name)), '%s')
        }
    """ % dish_name)
    sparql.setReturnFormat(JSON)
    results_food = sparql.query().convert()
    return results_food

In [10]:
def parse_ingredients(results_ingredient):
    
    ingredients_list = results_ingredient['results']['bindings']
    ingredients = set()
    
    for i in ingredients_list:
        i['ingredients']['value'] = re.sub(r"[^a-zA-Z0-9(),; ]+", '', i['ingredients']['value'])
        for j in i['ingredients']['value'].replace(';', ',').replace('(',',').replace(')',',').split(','):
            if len(j.replace(' ', '')) > 2 and re.sub('[0-9 ]+[oz]*', '', j):
                if any(item in words for item in j.lower().split(' ')):
                    ingredients.add(j.lower().lstrip().rstrip())
    try:
        ingredients.remove('')
    except:
        pass
    try:
        ingredients.remove('.')
    except:
        pass
    return ingredients

In [11]:
def parse_dish(results_ingredient):
    
    ingredients_list = results_ingredient['results']['bindings']
    ingredients = set()
    
    for i in ingredients_list:
        i['name']['value'] = re.sub(r"[^a-zA-Z0-9(),; ]+", '', i['name']['value'])
        for j in i['name']['value'].replace(';', ',').replace('(',',').replace(')',',').split(','):
            if len(j.replace(' ', '')) > 2 and re.sub('[0-9 ]+[oz]*', '', j):
                ingredients.add(j.lower().lstrip().rstrip())
    try:
        ingredients.remove('')
    except:
        pass
    try:
        ingredients.remove('.')
    except:
        pass
    return ingredients

In [12]:
def possible_allergens(ingredients):
    
    print('{:<10}\t{:<25}\t{:<20}\n'.format('Ingredient', 'Allergen', 'Allergy'))
    for i in ingredients:
        for allergy in all_allergies['results']['bindings']:
            if i.lower() in allergy['allergen_label']['value'].lower():
                print('{:<10}\t{:<25}\t{:<20}'.format(i.lower(), allergy['allergen_label']['value'].lower(), allergy['allergy_label']['value'].lower()))

In [13]:
len(all_allergies['results']['bindings'])

3811

### Testing some dishes, ingredients and checking allergens

In [14]:
results_ingredient = query_ingredient('chayote')
for i in parse_dish(results_ingredient):
    print(i)

caldo soup kit
nona lim
papaya curry sauce
tropical pepper co
mexican stir fry
spicy rice soup


In [15]:
results_dish = query_dish('pad thai')
all_ingredients= parse_ingredients(results_dish)
possible_allergens(all_ingredients)

Ingredient	Allergen                 	Allergy             

flavoring 	flavoring agent (substance)	allergy to flavour  
flavoring 	flavoring agent (substance)	allergy to flavor (disorder)
shrimp    	shrimp                   	allergy to shrimp (disorder)
shrimp    	shrimp agent (substance) 	allergy to shrimp (disorder)
shrimp    	shrimp agent             	allergy to shrimp (disorder)
shrimp    	shrimp product (substance)	allergy to shrimp (disorder)
peanut oil	peanut oil               	arachis oil allergy (disorder)
salt      	borate salt (substance)  	borate allergy (disorder)
salt      	epsom salt               	magnesium sulphate allergy
salt      	ferrous salt agent (substance)	ferrous salt allergy (disorder)
salt      	epsom salt               	magnesium sulfate allergy (disorder)
extract   	allergen extract vaccine (substance)	allergen extract vaccine allergy (disorder)
extract   	allergen extract vaccine (substance)	allergen extract vaccine poisoning of undetermined intent (disord

### Listing allergies to a drug (which can in fact be done with any substance)

In [16]:
drug_substances = set()
results_drugs = query_drugs('paracetamol')
for drug in results_drugs['results']['bindings']:
    drug_substances.add(drug['substance_label']['value'].lower())
possible_allergens(drug_substances)
# print(drug_substances)

Ingredient	Allergen                 	Allergy             

paracetamol	paracetamol              	paracetamol and dextropropoxyphene allergy
paracetamol	paracetamol              	paracetamol allergy (disorder)
paracetamol	paracetamol              	paracetamol and dextropropoxyphene allergy (disorder)
paracetamol	paracetamol              	paracetamol allergy 
paracetamol	paracetamol              	acetaminophen allergy (disorder)
paracetamol	paracetamol              	acetaminophen and dextropropoxyphene allergy (disorder)
