In [None]:
import requests
from bs4 import BeautifulSoup
import json
from rdflib import Graph, Literal, RDF, URIRef,BNode,Namespace
from rdflib.namespace import XSD,RDF,RDFS
from ingredients_partition import *
from ingredient_class_match import find_ingredient_URI
import urllib.parse
from scraping_functions import * 
from USDA_score import *
from FSA_color import *
import glob
import nltk

In [None]:
def create_json_to_ttl(dictionary):    
    
    if dictionary[1] and dictionary[1]!=None:
        dict=dictionary[1]

        #starting to create the RDF model
        g=Graph()
        SDO = Namespace("https://schema.org/")
        #URI for recipe(website adress)
        recipeURI=URIRef(dict["mainEntityOfPage"])
        #Project website base URI
        baseProjectURIstring="http://purl.org/recipekg/"
        ExURI = Namespace(baseProjectURIstring)
        
        #recipe URI-Just string to use 
        baseRecipeURIstring="http://purl.org/recipekg/recipe/"
            
        food = Namespace("http://purl.org/heals/food/")
        
        #split the URL with slash and take the recipe name(which is always the one before last)
        nameFromURL=dict["mainEntityOfPage"].split("/")[-2]
        newRecipeURI=URIRef(baseRecipeURIstring+nameFromURL)

        #first triples for recipe
        g.add((newRecipeURI, RDF.type, SDO.Recipe))
        g.add((newRecipeURI, SDO.name, Literal(dict["name"],datatype=XSD.string)))
        g.add((newRecipeURI, SDO.datePublished, Literal(dict["datePublished"],datatype=SDO.date)))
        
        
        if "recipeYield" in dict and dict["recipeYield"]:      
            g.add((newRecipeURI, SDO.recipeYield, Literal(dict["recipeYield"],datatype=XSD.string))) 
        
        #categories
        if dictionary[0] and dictionary[0]!=None:
            dict2=dictionary[0]
            
            #Taking recipe categories from URL into list
            path=dict2["itemListElement"][-1]["item"]["@id"]
            list=path.split("/")
            
            #first 6 element is base url, categories start after 6th slash
            if len(list)>6:
                newlist=list[5:-1]

                #Create category hiearchy 
                categoryURIstring=baseProjectURIstring+"categories/"+newlist[0]+"/"

                categoryURI=URIRef(categoryURIstring)
                #Put first category under SDO category class
                g.add((categoryURI ,RDF.type, ExURI.RecipeCategory)) 

                tempURI=categoryURI
                #Make subclass hierarchy of categories from second to last one
                for category in newlist[1:]:
                    categoryURIstring=categoryURIstring+category+"/"
                    newURI=URIRef(categoryURIstring)
                    g.add((newURI, RDFS.subClassOf, tempURI)) 
                    tempURI=newURI

                #Put recipe under least category level
                g.add((newRecipeURI, ExURI.belongsTo, tempURI)) 
        
            
        #aggregate rating
        aggregateRatingBNode=BNode()
        g.add((aggregateRatingBNode, RDF.type, SDO.AggregateRating))
        g.add((newRecipeURI, SDO.aggregateRating, aggregateRatingBNode))
        
        #start creating aggreagate rating elements' triples
        if "aggregateRating" in dict and dict["aggregateRating"]:
            for key in dict["aggregateRating"].keys():
                if key!="@type" and key!="itemReviewed":
                    a="https://schema.org/"+str(key)
                    newURI=URIRef(a)
                    #rating value is float
                    if key=="ratingValue":
                        g.add((aggregateRatingBNode, newURI, Literal(dict["aggregateRating"][key],datatype=XSD.float)))
                    else:
                        g.add((aggregateRatingBNode, newURI, Literal(dict["aggregateRating"][key],datatype=XSD.integer)))

        #ingredient ttl
        for ingredient in dict["recipeIngredient"]:

            ingredientBNode=BNode()
            #Adding Recipe->Ingredient->Subingredient relation
            g.add((newRecipeURI, food.hasIngredient, ingredientBNode))

            #split the ingredient into 3 part as: quantity,unit,name
            #(function taken from ingredients_partition.py)
            parsed_ents=ingredient_partition(ingredient)
            
            quantity=None
            unit=None
            name=None
            
            w = nltk.WordNetLemmatizer()
            for X in parsed_ents:
                if X.label_=="QUANTITY":
                    quantity=w.lemmatize(X.text)
                if X.label_=="UNIT":
                    unit=w.lemmatize(X.text)
                if X.label_=="PRODUCT":
                    name=w.lemmatize(X.text)     
                    
            if name:
                #create the URI for ingredient (function taken from ingredient_class_match.py)
                ingredientInfoList=find_ingredient_URI(name)
                ingredientURI=URIRef(ingredientInfoList[1])
                g.add((ingredientBNode, RDF.type, ingredientURI))

                #Adding Subingredient->NameProp->name
                #take the name from find_ingredient_URI function result
                g.add((ingredientBNode, ExURI.ingredientName, Literal(ingredientInfoList[0])))

                #Adding Subingredient->UnitProp->unit relation
                if unit:
                    g.add((ingredientBNode, ExURI.hasUnit, Literal(unit)))
                #Adding Recipe->QuantityProp->quantity relation
                if quantity:
                    g.add((ingredientBNode, ExURI.hasQuantity, Literal(quantity)))


        #nutrition ttl
        #there are 11 nutrition, special blank nodes and naming was given every one of them
        #according to ontology
        #4 of them has also FSA Color
        if "nutrition" in dict and dict["nutrition"]:
            
            nutritionalInfoBNode=BNode()

            g.add((newRecipeURI, ExURI.hasNutritionalInformation, nutritionalInfoBNode))
            g.add((nutritionalInfoBNode, RDF.type, ExURI.NutritionalInformation))
            
            FSA_score=0
            
            for a in dict["nutrition"]:
                
                if a=="fatContent":
                    if dict["nutrition"][a] and dict["nutrition"][a]!=None:
                        bNode=BNode()
                        propertyURIString=baseProjectURIstring+"hasFatData"
                        propertyURI=URIRef(propertyURIString)
                        #add relation between content property and blank node
                        g.add((nutritionalInfoBNode,propertyURI,bNode))
                        
                        typeURIString=baseProjectURIstring+"FatData"
                        typeURI=URIRef(typeURIString)
                        g.add((bNode,RDF.type,typeURI))
                        #split the content into two as unit and quantity
                        list=dict["nutrition"][a].split()            
                        if list[0]:
                            g.add((bNode, ExURI.hasAmount, Literal(list[0],datatype=XSD.float)))
                        if list[1]:
                            g.add((bNode, ExURI.hasUnit, Literal(list[1])))
                
                        #FSA Color and Scoring
                        color=FSA_fat(list[0])        
                        #add scoring according to color
                        if color=="Green":
                            FSA_score=FSA_score+2
                        if color=="Amber":
                            FSA_score=FSA_score+1
                                    
                        colorURIString=baseProjectURIstring+"FSA"+color
                        colorURI=URIRef(colorURIString)
                        g.add((bNode, ExURI.hasFSAColor,colorURI ))
                        g.add((colorURI,RDF.type,ExURI.FSAColor))
                
                if a=="saturatedFatContent":
                    if dict["nutrition"][a] and dict["nutrition"][a]!=None:
                        bNode=BNode()
                        propertyURIString=baseProjectURIstring+"hasSaturatedFatData"
                        propertyURI=URIRef(propertyURIString)
                        #add relation between content property and blank node
                        g.add((nutritionalInfoBNode,propertyURI,bNode))
                        
                        typeURIString=baseProjectURIstring+"SaturatedFatData"
                        typeURI=URIRef(typeURIString)
                        g.add((bNode,RDF.type,typeURI))
                        #split the content into two as unit and quantity
                        list=dict["nutrition"][a].split()            
                        if list[0]:
                            g.add((bNode, ExURI.hasAmount, Literal(list[0],datatype=XSD.float)))
                        if list[1]:
                            g.add((bNode, ExURI.hasUnit, Literal(list[1])))
                
                        #FSA Color and Scoring
                        #(functions are taken from FSA_color.py)
                        color=FSA_saturated_fat(list[0])        
                        #add scoring according to color
                        if color=="Green":
                            FSA_score=FSA_score+2
                        if color=="Amber":
                            FSA_score=FSA_score+1
                                    
                        colorURIString=baseProjectURIstring+"FSA"+color
                        colorURI=URIRef(colorURIString)
                        g.add((bNode, ExURI.hasFSAColor,colorURI ))
                        g.add((colorURI,RDF.type,ExURI.FSAColor))

                if a=="sugarContent":
                    if dict["nutrition"][a] and dict["nutrition"][a]!=None:
                        bNode=BNode()
                        propertyURIString=baseProjectURIstring+"hasSugarData"
                        propertyURI=URIRef(propertyURIString)
                        #add relation between content property and blank node
                        g.add((nutritionalInfoBNode,propertyURI,bNode))
                        
                        typeURIString=baseProjectURIstring+"SugarData"
                        typeURI=URIRef(typeURIString)
                        g.add((bNode,RDF.type,typeURI))
                        #split the content into two as unit and quantity
                        list=dict["nutrition"][a].split()            
                        if list[0]:
                            g.add((bNode, ExURI.hasAmount, Literal(list[0],datatype=XSD.float)))
                        if list[1]:
                            g.add((bNode, ExURI.hasUnit, Literal(list[1])))
                
                        #FSA Color and Scoring
                        #(functions are taken from FSA_color.py)
                        color=FSA_sugar(list[0])        
                        #add scoring according to color
                        if color=="Green":
                            FSA_score=FSA_score+2
                        if color=="Amber":
                            FSA_score=FSA_score+1
                                    
                        colorURIString=baseProjectURIstring+"FSA"+color
                        colorURI=URIRef(colorURIString)
                        g.add((bNode, ExURI.hasFSAColor,colorURI ))
                        g.add((colorURI,RDF.type,ExURI.FSAColor))
                    
                    
                if a=="sodiumContent":
                    if dict["nutrition"][a] and dict["nutrition"][a]!=None:
                        bNode=BNode()
                        propertyURIString=baseProjectURIstring+"hasSodiumData"
                        propertyURI=URIRef(propertyURIString)
                        #add relation between content property and blank node
                        g.add((nutritionalInfoBNode,propertyURI,bNode))
                        
                        typeURIString=baseProjectURIstring+"SodiumData"
                        typeURI=URIRef(typeURIString)
                        g.add((bNode,RDF.type,typeURI))
                        #split the content into two as unit and quantity
                        list=dict["nutrition"][a].split()            
                        if list[0]:
                            g.add((bNode, ExURI.hasAmount, Literal(list[0],datatype=XSD.float)))
                        if list[1]:
                            g.add((bNode, ExURI.hasUnit, Literal(list[1])))
                
                        #FSA Color and Scoring
                        #(functions are taken from FSA_color.py)
                        color=FSA_sodium(list[0])        
                        #add scoring according to color
                        if color=="Green":
                            FSA_score=FSA_score+2
                        if color=="Amber":
                            FSA_score=FSA_score+1
                                    
                        colorURIString=baseProjectURIstring+"FSA"+color
                        colorURI=URIRef(colorURIString)
                        g.add((bNode, ExURI.hasFSAColor,colorURI ))
                        g.add((colorURI,RDF.type,ExURI.FSAColor))
                        
                if a=="carbohydrateContent":
                    if dict["nutrition"][a] and dict["nutrition"][a]!=None:
                        bNode=BNode()
                        propertyURIString=baseProjectURIstring+"hasCarbohydrateData"
                        propertyURI=URIRef(propertyURIString)
                        #add relation between content property and blank node
                        g.add((nutritionalInfoBNode,propertyURI,bNode))
                        
                        typeURIString=baseProjectURIstring+"CarbohydrateData"
                        typeURI=URIRef(typeURIString)
                        g.add((bNode,RDF.type,typeURI))
                        #split the content into two as unit and quantity
                        list=dict["nutrition"][a].split()            
                        if list[0]:
                            g.add((bNode, ExURI.hasAmount, Literal(list[0],datatype=XSD.float)))
                        if list[1]:
                            g.add((bNode, ExURI.hasUnit, Literal(list[1])))
                    

                if a=="cholesterolContent":
                    if dict["nutrition"][a] and dict["nutrition"][a]!=None:
                        bNode=BNode()
                        propertyURIString=baseProjectURIstring+"hasCholesterolData"
                        propertyURI=URIRef(propertyURIString)
                        #add relation between content property and blank node
                        g.add((nutritionalInfoBNode,propertyURI,bNode))
                        
                        typeURIString=baseProjectURIstring+"CholesterolData"
                        typeURI=URIRef(typeURIString)
                        g.add((bNode,RDF.type,typeURI))
                        #split the content into two as unit and quantity
                        list=dict["nutrition"][a].split()            
                        if list[0]:
                            g.add((bNode, ExURI.hasAmount, Literal(list[0],datatype=XSD.float)))
                        if list[1]:
                            g.add((bNode, ExURI.hasUnit, Literal(list[1])))
                            
                            
                if a=="fiberContent":
                    if dict["nutrition"][a] and dict["nutrition"][a]!=None:
                        bNode=BNode()
                        propertyURIString=baseProjectURIstring+"hasFiberData"
                        propertyURI=URIRef(propertyURIString)
                        #add relation between content property and blank node
                        g.add((nutritionalInfoBNode,propertyURI,bNode))
                        
                        typeURIString=baseProjectURIstring+"FiberData"
                        typeURI=URIRef(typeURIString)
                        g.add((bNode,RDF.type,typeURI))
                        #split the content into two as unit and quantity
                        list=dict["nutrition"][a].split()            
                        if list[0]:
                            g.add((bNode, ExURI.hasAmount, Literal(list[0],datatype=XSD.float)))
                        if list[1]:
                            g.add((bNode, ExURI.hasUnit, Literal(list[1])))
                    
                
                if a=="proteinContent":
                    if dict["nutrition"][a] and dict["nutrition"][a]!=None:
                        bNode=BNode()
                        propertyURIString=baseProjectURIstring+"hasProteinData"
                        propertyURI=URIRef(propertyURIString)
                        #add relation between content property and blank node
                        g.add((nutritionalInfoBNode,propertyURI,bNode))
                        
                        typeURIString=baseProjectURIstring+"ProteinData"
                        typeURI=URIRef(typeURIString)
                        g.add((bNode,RDF.type,typeURI))
                        #split the content into two as unit and quantity
                        list=dict["nutrition"][a].split()            
                        if list[0]:
                            g.add((bNode, ExURI.hasAmount, Literal(list[0],datatype=XSD.float)))
                        if list[1]:
                            g.add((bNode, ExURI.hasUnit, Literal(list[1])))
                                  
                            
                if a=="calories":
                    if dict["nutrition"][a] and dict["nutrition"][a]!=None:
                        bNode=BNode()
                        propertyURIString=baseProjectURIstring+"hasCalorificData"
                        propertyURI=URIRef(propertyURIString)
                        #add relation between content property and blank node
                        g.add((nutritionalInfoBNode,propertyURI,bNode))
                        
                        typeURIString=baseProjectURIstring+"CalorificData"
                        typeURI=URIRef(typeURIString)
                        g.add((bNode,RDF.type,typeURI))
                        #split the content into two as unit and quantity
                        list=dict["nutrition"][a].split()            
                        if list[0]:
                            g.add((bNode, ExURI.hasAmount, Literal(list[0],datatype=XSD.float)))
                        if list[1]:
                            g.add((bNode, ExURI.hasUnit, Literal(list[1]))) 
                
                if a=="transFatContent":
                    if dict["nutrition"][a] and dict["nutrition"][a]!=None:
                        bNode=BNode()
                        propertyURIString=baseProjectURIstring+"hasTransFataData"
                        propertyURI=URIRef(propertyURIString)
                        #add relation between content property and blank node
                        g.add((nutritionalInfoBNode,propertyURI,bNode))
                        
                        typeURIString=baseProjectURIstring+"TransFatData"
                        typeURI=URIRef(typeURIString)
                        g.add((bNode,RDF.type,typeURI))
                        #split the content into two as unit and quantity
                        list=dict["nutrition"][a].split()            
                        if list[0]:
                            g.add((bNode, ExURI.hasAmount, Literal(list[0],datatype=XSD.float)))
                        if list[1]:
                            g.add((bNode, ExURI.hasUnit, Literal(list[1])))
                            
                if a=="unsaturatedFatContent":
                    if dict["nutrition"][a] and dict["nutrition"][a]!=None:
                        bNode=BNode()
                        propertyURIString=baseProjectURIstring+"hasUnsaturatedFatData"
                        propertyURI=URIRef(propertyURIString)
                        #add relation between content property and blank node
                        g.add((nutritionalInfoBNode,propertyURI,bNode))
                        
                        typeURIString=baseProjectURIstring+"UnsaturatedFatData"
                        typeURI=URIRef(typeURIString)
                        g.add((bNode,RDF.type,typeURI))
                        #split the content into two as unit and quantity
                        list=dict["nutrition"][a].split()            
                        if list[0]:
                            g.add((bNode, ExURI.hasAmount, Literal(list[0],datatype=XSD.float)))
                        if list[1]:
                            g.add((bNode, ExURI.hasUnit, Literal(list[1])))
                                
            #FSA_Score
            g.add((newRecipeURI, ExURI.hasFSAScore, Literal(int(FSA_score),datatype=XSD.integer)))
            
            #USDA score
            USDA_score=calculate_USDA_score(dict)
            if USDA_score!=None:
                g.add((newRecipeURI, ExURI.hasUSDAScore, Literal(int(USDA_score),datatype=XSD.integer)))            
        
        return(g)

In [None]:
#Take all the json files into list
#put here your json file path!
json_files = glob.glob("/example/json/*.json")
#transform json files into ttl files according to ontology by using create_json_to_ttl function
for a in range(0,10):
    #(function is taken from scraping_functions.py)
    dict=open_recipe_jsonfile(json_files[a])
    g=create_json_to_ttl(dict)
    print(a)
    #put here your rdf file path!
    g.serialize(destination="/example/rdf/"+str(a)+".ttl")