In [5]:
from owlready2 import *
import csv
from rdflib import Graph
import owlrl
from rdflib.namespace import OWL, URIRef
from rdflib.util import guess_format
from TripleGenerator import TripleGeneratingClass

# Task 2.3

### Hawaiian Pizza 

In [52]:
def SPARQLQuery_SubTask_1(file_query_out):

        qres = dependecies_task.g.query(
            """
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
            PREFIX cw: <http://www.semanticweb.org/in3067-inm713/restaurants#>
            SELECT DISTINCT  ?name ?itemName ?address ?postcode ?city ?state ?country WHERE { 
                ?Restaurant rdf:type cw:Restaurant .
                ?Restaurant cw:itemName ?itemName .
                ?Restaurant cw:restaurantName ?name.
                ?Restaurant cw:firstLineAddress ?address .
                ?Restaurant cw:postCode ?postcode .
                ?Restaurant cw:City ?city .
                ?Restaurant cw:State ?state .
                ?Restaurant cw:Country ?country
            
                FILTER REGEX(?itemName, "hawaii", "i")
            }
            """
        )
        print("%s capitals satisfying the query." % (str(len(qres))))

        #f_out = open(file_query_out,"w+")
        header = ['Restaurant Name', 'Item', 'Address', 'Post Code', 'City', 'State', 'Country']

        with open(file_query_out,"w", encoding="UTF-8") as f:
            writeQuery = csv.writer(f)
            writeQuery.writerow(header)

            for row in qres:
                #Row is a list of matched RDF terms: URIs, literals or blank nodes
                line_str = [row.name, row.itemName, row.address, row.postcode, row.city, row.state, row.country]
                print(line_str)

                writeQuery.writerow(line_str)

In [13]:
#CSV files created in Task SPARQL (csv format).
SPARQLQuery_SubTask_1(queryfile.replace(".csv", "-"+task)+"query-hawaiianpizza.csv")

7 capitals satisfying the query.
[rdflib.term.Literal('Bravo Pizza Hollywood', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('Hawaiian Pizza', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('5142 Hollywood Blvd', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('90027.0', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('Los Angeles', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('Los Feliz', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('US', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string'))]
[rdflib.term.Literal("Giovanni's Pizza and Pasta", datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('Hawaiin Pizza', datatype=rdflib.term.URIRef('http://www.w3.

### Average Price without Tomatoes

In [8]:
def SPARQLQuery_SubTask_2(file_query_out):

        qres = dependecies_task.g.query(
            """
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
            PREFIX cw: <http://www.semanticweb.org/in3067-inm713/restaurants#>
            SELECT DISTINCT (AVG(?price) AS ?value_avg) WHERE { 
                
                ?food rdf:type cw:MenuItem .
                ?food cw:amount ?price .
                ?food cw:itemName ?pizzaName .
                ?food cw:name ?itemDesc
                
                FILTER NOT EXISTS {
                    FILTER regex(?itemDesc, "tomat", "i")
                    #FILTER regex(?price, "NaN", "i")
                }
            } 
            """
        )
        print("%s capitals satisfying the query." % (str(len(qres))))

        #f_out = open(file_query_out,"w+")
        header = ['Average Value']

        with open(file_query_out,"w", encoding="UTF-8") as f:
            writeQuery = csv.writer(f)
            writeQuery.writerow(header)

            for row in qres:
                #Row is a list of matched RDF terms: URIs, literals or blank nodes
                line_str = [row.value_avg]
                print(line_str)

                writeQuery.writerow(line_str)

In [9]:
##CSV files created in Task SPARQL (csv format).
SPARQLQuery_SubTask_2(queryfile.replace(".csv", "-"+task)+"query-results-averagevalue.csv")

1 capitals satisfying the query.
[rdflib.term.Literal('12.434967234600204', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#double'))]


### Number of restaurants by City

In [10]:
def SPARQLQuery_SubTask_3(file_query_out):

        qres = dependecies_task.g.query(
            """
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
            PREFIX cw: <http://www.semanticweb.org/in3067-inm713/restaurants#>
            SELECT DISTINCT ?city ?state (COUNT(*) AS ?count_city)  WHERE { 
                ?Restaurant rdf:type cw:Restaurant .
                
                ?Restaurant cw:City ?city .
                ?Restaurant cw:State ?state .
                ?Restaurant cw:hasCity ?count_city .
                ?Restaurant cw:hasState ?count_state
                
            }
            GROUP BY ?city ?state
            ORDER BY ASC(?state) ASC(?count_city)
            """
        )
        print("%s capitals satisfying the query." % (str(len(qres))))

        #f_out = open(file_query_out,"w+")
        header = ['City', 'State', 'Number of Restaurants']

        with open(file_query_out,"w", encoding="UTF-8") as f:
            writeQuery = csv.writer(f)
            writeQuery.writerow(header)

            for row in qres:
                #Row is a list of matched RDF terms: URIs, literals or blank nodes
                line_str = [row.city, row.state , row.count_city]
                print(line_str)

                writeQuery.writerow(line_str)

In [15]:
##CSV files created in Task SPARQL (csv format).
SPARQLQuery_SubTask_3(queryfile.replace(".csv", "-"+task)+"query-numberofrestaurants.csv")

144 capitals satisfying the query.
[rdflib.term.Literal('Fairbanks', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('AK', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('1', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer'))]
[rdflib.term.Literal('Phoenix', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('AZ', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('1', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer'))]
[rdflib.term.Literal('Chandler', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('AZ', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('1', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer'))]
[rdflib.term.Literal('Hammonton', datatype=rdflib.ter

### Average Pizza price 

In [17]:
def SPARQLQuery_SubTask_3(file_query_out):
    qres = dependecies_task.g.query(
        '''
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX cw: <http://www.semanticweb.org/in3067-inm713/restaurants#>
        SELECT DISTINCT ?restaurant ?city (AVG(?price) AS ?value_avg) WHERE { 
            ?rest rdf:type cw:Restaurant ;
                cw:servesMenuItem ?menuItem .
                
            ?rest cw:restaurantName ?restaurant .
            ?rest cw:City ?city .
            
            ?menuItem rdf:type cw:MenuItem;
                    cw:amount ?price .
        } 
        GROUP BY ?restaurant ?city 
        ORDER BY ASC(?city) ASC(?value_avg)

        '''

    )
    print("%s capitals satisfying the query." % (str(len(qres))))

    header = ['Restaurant Name', 'City', 'Average price']

    with open(file_query_out,"w", encoding="UTF-8") as f:
        writeQuery = csv.writer(f)
        writeQuery.writerow(header)

        for row in qres:
            #Row is a list of matched RDF terms: URIs, literals or blank nodes
            line_str = [row.restaurant, row.city, row.value_avg]
            print(line_str)

            writeQuery.writerow(line_str)

In [19]:
##CSV files created in Task SPARQL (csv format).
SPARQLQuery_SubTask_3(queryfile.replace(".csv", "-"+task)+"query-results-averagepizzaprice.csv")

139 capitals satisfying the query.
[rdflib.term.Literal("Casino's Pizza", datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('Addison', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('4.73', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#double'))]
[rdflib.term.Literal('The Fire Den', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('Alameda', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('17.635200000000005', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#double'))]
[rdflib.term.Literal('La Hacienda', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('Americus', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('6.275', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#double'))]
[rd

### Pizzas without a price

In [15]:
def SPARQLQuery_SubTask_3(file_query_out):

        qres = dependecies_task.g.query(
            """
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
            PREFIX cw: <http://www.semanticweb.org/in3067-inm713/restaurants#>
            PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
            SELECT DISTINCT ?itemName WHERE { 
                
                ?food rdf:type cw:MenuItem .
                ?food cw:itemName ?itemName .
                ?food cw:hasValue ?price.
                
                FILTER ( ?price = cw: )
            }
            """
        )
        print("%s capitals satisfying the query." % (str(len(qres))))

        #f_out = open(file_query_out,"w+")
        header = ['Pizza Name']

        with open(file_query_out,"w", encoding="UTF-8") as f:
            writeQuery = csv.writer(f)
            writeQuery.writerow(header)

            for row in qres:
                #Row is a list of matched RDF terms: URIs, literals or blank nodes
                line_str = [row.itemName]
                print(line_str)

                writeQuery.writerow(line_str)

In [22]:
##CSV files created in Task SPARQL (csv format).
SPARQLQuery_SubTask_3(queryfile.replace(".csv", "-"+task)+"query-withoutprice.csv")

139 capitals satisfying the query.
[rdflib.term.Literal("Casino's Pizza", datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('Addison', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('4.73', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#double'))]
[rdflib.term.Literal('The Fire Den', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('Alameda', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('17.635200000000005', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#double'))]
[rdflib.term.Literal('La Hacienda', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('Americus', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#string')), rdflib.term.Literal('6.275', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#double'))]
[rd

In [29]:
#In reference of Lab 8

def getClasses(onto):
    return onto.classes()

def getDataProperties(onto):
    return onto.data_properties()

def getObjectProperties(onto):
    return onto.object_properties()

def getIndividuals(onto):
    return onto.individuals()


In [39]:
#Creating a new graph
graph_oa = Graph()

load_pz_onto = "pizza.owl"
load_cw_onto = "cw_onto.owl"

#Loading the two owl files
p_onto = get_ontology(load_pz_onto).load()
c_onto = get_ontology(load_cw_onto).load()



FileNotFoundError: [WinError 2] The system cannot find the file specified: 'pizza.owl#'

In [37]:
#brute force where we have a for loop that goes through the cw_onto file with another nested for loop
for cls_cw_onto in list(getClasses(c_onto)):
    subject = URIRef(cls_cw_onto.iri)
    
    for cls_pz_onto in list(getClasses(p_onto)):
        object = URIRef(cls_pz_onto.iri)
       #checking whether the name matches
        if cls_cw_onto.name == cls_pz_onto.name:
            graph_oa.add((subject, OWL.equivalentClass, object))
            print()
            #gives us 6 triples
            print(str((subject, OWL.equivalentClass, object)))

NameError: name 'c_onto' is not defined

In [38]:
for cls_cw_onto in list(getDataProperties(c_onto)):
    subject = URIRef(cls_cw_onto.iri)
    #print(cls_cw_onto.name)
    for cls_pz_onto in list(getDataProperties(p_onto)):
        object = URIRef(cls_pz_onto.iri)
        print(cls_pz_onto.iri)
        if cls_cw_onto == cls_pz_onto:
            graph_oa.add((subject, OWL.equivalentProperty, object))
            print()
            print(str((subject, OWL.equivalentProperty, object)))

NameError: name 'c_onto' is not defined

No Data properties in Pizza.owl

In [63]:
for cls_cw_onto in list(getObjectProperties(c_onto)):
    subject = URIRef(cls_cw_onto.iri)
    #print(cls_cw_onto.name)
    for cls_pz_onto in list(getObjectProperties(p_onto)):
        object = URIRef(cls_pz_onto.iri)
        #print(cls_pz_onto.iri)
        if cls_cw_onto.name == cls_pz_onto.name:
            graph_oa.add((subject, OWL.equivalentProperty, object))
            print()
            print(str((subject, OWL.equivalentProperty, object)))


(rdflib.term.URIRef('http://www.semanticweb.org/in3067-inm713/restaurants#hasIngredient'), rdflib.term.URIRef('http://www.w3.org/2002/07/owl#equivalentProperty'), rdflib.term.URIRef('http://www.co-ode.org/ontologies/pizza/pizza.owl#hasIngredient'))

(rdflib.term.URIRef('http://www.semanticweb.org/in3067-inm713/restaurants#isIngredientOf'), rdflib.term.URIRef('http://www.w3.org/2002/07/owl#equivalentProperty'), rdflib.term.URIRef('http://www.co-ode.org/ontologies/pizza/pizza.owl#isIngredientOf'))


In [64]:
for cls_cw_onto in list(getIndividuals(c_onto)):
    subject = URIRef(cls_cw_onto.iri)
    print(cls_cw_onto.name)
    for cls_pz_onto in list(getIndividuals(p_onto)):
        object = URIRef(cls_pz_onto.iri)
        #print(cls_pz_onto.iri)
        if cls_cw_onto.name == cls_pz_onto.name:
            graph_oa.add((subject, OWL.equivalentProperty, object))
            print()
            print(str((subject, OWL.equivalentProperty, object)))

DaiyaCheese
EUR
GBP
USD
bianca_pizza_little_pizza_paradise_bend
capers
little_Pizza_Paradise_Bend
olive
onion
pizza_putanesca_the_brentwood_los_angeles
tomato
the_brentwood_los_angeles


No matching individuals

In [43]:
def performReasoning_OA(ontology_file):    
        print("Data triples from CSV: '" + str(len(graph_oa)) + "'.")
        graph_oa.load(ontology_file,  format=guess_format(ontology_file)) #e.g., format=ttl
        print("Triples including ontology: '" + str(len(graph_oa)) + "'.") 
        #Applying reasoning and expand the graph with new triples 
        owlrl.DeductiveClosure(owlrl.OWLRL_Semantics, axiomatic_triples=False, datatype_axioms=False).expand(graph_oa)

        print("Triples after OWL 2 RL reasoning: '" + str(len(graph_oa)) + "'.")

In [44]:
def saveGraph_OA(file_output):
    ##SAVE/SERIALIZE GRAPH
    #print(self.g.serialize(format="turtle").decode("utf-8"))
    graph_oa.serialize(destination=file_output, format='ttl')

In [45]:
#Ontology alignment results (turtle format).
saveGraph_OA("onto_alignment_1.ttl")

#### Subtask.2

In [50]:
#i 
#Extended RDF data 2 after reasoning with the alignments (turtle format).
performReasoning_OA("cw_onto.owl")
saveGraph_OA("cw_onto_reasoner.ttl")

Data triples from CSV: '45330'.
Triples including ontology: '45592'.
Triples after OWL 2 RL reasoning: '56288'.


In [47]:
#ii
performReasoning_OA("pizza.owl")
saveGraph_OA("pizza_onto_OA_reasoner.ttl")

Data triples from CSV: '3535'.
Triples including ontology: '5478'.
Triples after OWL 2 RL reasoning: '13414'.


In [48]:
#iii
performReasoning_OA("onto_alignment_1.ttl")
saveGraph_OA("onto_alignment_1_reasoner.ttl")

Data triples from CSV: '13414'.
Triples including ontology: '13414'.
Triples after OWL 2 RL reasoning: '13420'.


In [49]:
#iv
performReasoning_OA("IN3067-INM713_coursework_data_pizza_500-task1.ttl")
saveGraph_OA("generated_data_OA.ttl")

Data triples from CSV: '13420'.
Triples including ontology: '19903'.
Triples after OWL 2 RL reasoning: '45330'.


In [45]:
def compareWithReference(reference_mappings_file, system_mappings_file):
    ref_mappings = Graph()
    ref_mappings.parse(reference_mappings_file, format="ttl")
    
    system_mappings = Graph()
    system_mappings.parse(system_mappings_file, format="ttl")
    
    #We calculate precision and recall via true positives, false positives and false negatives
    #https://en.wikipedia.org/wiki/Precision_and_recall
            
    tp=0
    fp=0
    fn=0
    
    for t in system_mappings:
        if t in ref_mappings:
            tp+=1
        else:
            fp+=1
    
    for t in ref_mappings:
        if not t in system_mappings:
            fn+=1
            
    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    f_score = (2*precision*recall)/(precision+recall)

    #print(tp, tp2)
    #print(fp)
    #print(fn)

    print("Comparing '" + system_mappings_file + "' with '" + reference_mappings_file)
    print("\tPrecision: " + str(precision))
    print("\tRecall: " + str(recall))
    print("\tF-Score: " + str(f_score))