In [91]:
'''
Adapted on 2 May 2022

@author: Kim Ushe Mupfumira
'''
import sys
sys.path.append("../")
from rdflib import Graph
from rdflib import URIRef, BNode, Literal
from rdflib import Namespace
from rdflib.namespace import OWL, RDF, RDFS, FOAF, XSD
from rdflib.util import guess_format
import pandas as pd
from isub import isub
from lookup import DBpediaLookup
import csv
import owlrl                



In [9]:
d = pd.read_csv("IN3067-INM713_coursework_data_pizza_500.csv")

print(d.columns)

d.head()

Index(['name', 'address', 'city', 'country', 'postcode', 'state', 'categories',
       'menu item', 'item value', 'currency', 'item description'],
      dtype='object')


Unnamed: 0,name,address,city,country,postcode,state,categories,menu item,item value,currency,item description
0,Little Pizza Paradise,Cascade Village Mall Across From Target,Bend,US,97701.0,OR,Pizza Place,Bianca Pizza,22.5,USD,
1,Little Pizza Paradise,Cascade Village Mall Across From Target,Bend,US,97701.0,OR,Pizza Place,Cheese Pizza,18.95,USD,
2,The Brentwood,148 S Barrington Ave,Los Angeles,US,90049.0,Brentwood,"American Restaurant,Bar,Bakery","Pizza, Margherita",12.0,USD,
3,The Brentwood,148 S Barrington Ave,Los Angeles,US,90049.0,Brentwood,"American Restaurant,Bar,Bakery","Pizza, Mushroom",13.0,USD,
4,The Brentwood,148 S Barrington Ave,Los Angeles,US,90049.0,Brentwood,"American Restaurant,Bar,Bakery","Pizza, Puttenesca",13.0,USD,"Olives, onions, capers, tomatoes"


In [71]:
NuloFildo = pd.isnull(d["item description"]) 
# 10 null postcodes
# 78 null "item values"
# 75 null "currency" 
# 325 null "item descriptions"

# http://www.semanticweb.org/in3067-inm713/restaurants#little_Pizza_Paradise_Bend

print('Nulls: ',len(d[NuloFildo]))

print('Tota Dataframe: ',len(d))

#d[NuloFildo].head(20)
d.head(20)

Nulls:  325
Tota Dataframe:  501


Unnamed: 0,name,address,city,country,postcode,state,categories,menu item,item value,currency,item description
0,Little Pizza Paradise,Cascade Village Mall Across From Target,Bend,US,97701.0,OR,Pizza Place,Bianca Pizza,22.5,USD,
1,Little Pizza Paradise,Cascade Village Mall Across From Target,Bend,US,97701.0,OR,Pizza Place,Cheese Pizza,18.95,USD,
2,The Brentwood,148 S Barrington Ave,Los Angeles,US,90049.0,Brentwood,"American Restaurant,Bar,Bakery","Pizza, Margherita",12.0,USD,
3,The Brentwood,148 S Barrington Ave,Los Angeles,US,90049.0,Brentwood,"American Restaurant,Bar,Bakery","Pizza, Mushroom",13.0,USD,
4,The Brentwood,148 S Barrington Ave,Los Angeles,US,90049.0,Brentwood,"American Restaurant,Bar,Bakery","Pizza, Puttenesca",13.0,USD,"Olives, onions, capers, tomatoes"
5,The Brentwood,148 S Barrington Ave,Los Angeles,US,90049.0,Brentwood,"American Restaurant,Bar,Bakery","Pizza, Salami Piccante",15.0,USD,
6,The Brentwood,148 S Barrington Ave,Los Angeles,US,90049.0,Brentwood,"American Restaurant,Bar,Bakery","Pizza, White Truffle Meat Sauce",15.0,USD,
7,Bravo Pizza Hollywood,5142 Hollywood Blvd,Los Angeles,US,90027.0,Los Feliz,Pizza Place,Cheese Pizza,10.99,USD,Choose a pizza size.
8,Bravo Pizza Hollywood,5142 Hollywood Blvd,Los Angeles,US,90027.0,Los Feliz,Pizza Place,Hawaiian Pizza,11.99,USD,"Canadian bacon, pineapple."
9,Bravo Pizza Hollywood,5142 Hollywood Blvd,Los Angeles,US,90027.0,Los Feliz,Pizza Place,Meat Lover Pizza,16.99,USD,


In [51]:

a = d['categories'].unique()
print (a.view())

['Pizza Place' 'American Restaurant,Bar,Bakery'
 'Bar,Beer Garden,Sports Bar,Sports Bar, Bar, and Beer Garden'
 'American Restaurant, Seafood Restaurant, and French Restaurant'
 'University,University University'
 'Sporting Goods Shop,Shoe Store,Sporting Goods Shop and Shoe Store Town Center,Sporting Goods Shop and Shoe Store,Sportswear,Shoe Stores,Running Stores,Clothing Stores,Sporting Goods'
 'Italian Restaurant,Restaurant,Italian Restaurant Streeterville'
 'Bagels,Bakeries,Restaurants,Restaurant,Breakfast Brunch & Lunch Restaurants,,Bagel Shop,Breakfast Spot,Coffee Shop'
 'Restaurant'
 'Italian Restaurant,Pizza Place,Take Out Restaurants,Restaurants,Italian Restaurant and Pizza Place,Pizza,doctor'
 'Restaurants,Pizza Place Southwest Dallas,Pizza Place' 'Sandwich Place'
 'Asian Restaurant,Asian Restaurant Forty Acres'
 'Family Style Restaurants,Restaurants,Restaurant'
 'Pizza,Delicatessens,Take Out Restaurants,Restaurants,Pizza Place'
 'American Restaurant'
 'Pizza Place,Pizza,Take 

In [217]:


class Lab5Solution(object):
    '''
    Example of a partial solution for Lab 5 
    '''
    def __init__(self, input_file):
   
        #The idea is to cover as much as possible from the original csv file, but for the lab and coursework I'm more interested 
        #in the ideas and proposed implementation than covering all possible cases in all rows (a perfect solution fall more into
        #the score of a PhD project). Also in terms of scalability calling the 
        #look-up services may be expensive so if this is a limitation, a solution tested over a reasonable percentage of the original 
        #file will be of course accepted.        
        self.file = input_file
    
        #Dictionary that keeps the URIs. Specially useful if accessing a remote service to get a candidate URI to avoid repeated calls
        self.stringToURI = dict()
        
        
        #1. GRAPH INITIALIZATION
    
        #Empty graph
        self.g = Graph()
        self.g.parse("pizza-restaurants-ontology.ttl")


        
        #This is the same namespace used in the ontology "pizza-restaurants-ontology.ttl"
        self.cw_ns_str = "http://www.semanticweb.org/in3067-inm713/restaurants#"
        
        #Special namspaces class to create directly URIRefs in python.           
        self.cw = Namespace(self.cw_ns_str)
        
        #Prefixes for the serialization
        self.g.bind("cw", self.cw)
        
        
        #Load data in dataframe  
        self.data_frame = pd.read_csv(self.file, sep=',', quotechar='"',escapechar="\\")    
    
        
        #KG
        self.dbpedia = DBpediaLookup()
    
    
    
    def Task1(self):
        self.CovertCSVToRDF(False)
        
    def Task2(self):
        self.CovertCSVToRDF(True)

    
    def SimpleUniqueMapping(self):
        #This mapping creates an several transformations (i.e., triples) in one go.
        #Unlike the modular approach (see ConvertCSVToRDF) this solution is less flexible to adaptations  
        
        #Format:
        #0       1          2      3         4           5       6           7           8             9          10 
        #name    address    city   country   postcode    state   categories  menu item   item value    currency   item description                        
        for col in self.data_frame.itertuples(index=False):
            #print(col[0])
                                    
            #we avoid NaN values, one could add more safety filters. This case is problematic in this dataset                            
            if (self.is_nan(col[0]) or self.is_nan(col[1])): 
                continue
            
            ###*************###    
            # URI creation ***#
            ###*************###
            entity_Restaurant_uri = self.cw_ns_str + col[0].lower().replace(" ", "_") + "_" + col[2].lower().replace(" ", "_")
            entity_address_uri = self.cw_ns_str + col[1].lower().replace(" ", "_")
            entity_city_uri = self.cw_ns_str + col[2].lower().replace(" ", "_")
            entity_country_uri = self.cw_ns_str + col[3].lower().replace(" ", "_")
            entity_state_uri = self.cw_ns_str + col[5].lower().replace(" ", "_")
            entity_postcode_uri = self.cw_ns_str #+ 'PostCode'#str(col[4]).lower().replace(" ", "_")
            entity_itemValue_uri = self.cw_ns_str + str(col[8]).replace(" ", "")
            
            # Replacing white space with an underscore is failing to load in Protege for the MenuItem URI
            entity_menuItem_uri = self.cw_ns_str + col[7].lower().replace(" ", "&")

            entity_ingredient_uri = self.cw_ns_str + str(col[10]).lower().replace(" ", "+")

            # Replacing white space with an underscore is failing to load in Protege for the ItemName URI
            entity_itemName_uri = self.cw_ns_str + col[7].lower().replace(" ", "&")

            entity_firstLineOfAddress_uri = self.cw_ns_str + col[1].lower().replace(" ", "_")
            
            entity_Currency_uri = self.cw_ns_str + str(col[9]).lower().replace(" ", "_")
            
            entity_amount_uri = self.cw_ns_str 
            
                                
            #Types triples
            self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.Restaurant))     #e.g. cw:london rdf:type cw:City
            self.g.add((URIRef(entity_country_uri), RDF.type, self.cw.Country))  #e.g. cw:united_kingdom rdf:type cw:Country
            self.g.add((URIRef(entity_city_uri), RDF.type, self.cw.City))
            self.g.add((URIRef(entity_state_uri), RDF.type, self.cw.State))
            self.g.add((URIRef(entity_itemValue_uri), RDF.type, self.cw.ItemValue))
            self.g.add((URIRef(entity_Currency_uri), RDF.type, self.cw.Currency))
            
            # OWL.DatatypeProperty triples
            self.g.add((URIRef(entity_postcode_uri), OWL.DatatypeProperty, self.cw.postCode))
            self.g.add((URIRef(entity_amount_uri), OWL.DatatypeProperty, self.cw.amount))    
            self.g.add((URIRef(entity_itemName_uri), OWL.DatatypeProperty, self.cw.itemName))
            
            
            # cw:ItemValue
            if (not self.is_nan(col[8])):
                self.g.add((URIRef(entity_itemValue_uri), self.cw.ItemValue, Literal(col[8], datatype=XSD.double)))
            
            
            # cw:postcode
            if (not self.is_nan(col[4])):
                self.g.add((URIRef(entity_postcode_uri), self.cw.postCode, Literal(col[4], datatype=XSD.string)))
            
            # adding cw:amount values into the graph
            if (not self.is_nan(col[8])):
                self.g.add((URIRef(entity_amount_uri), self.cw.amount, Literal(col[8], datatype=XSD.double)))


            # adding cw:ItemName values into the graph
            if (not self.is_nan(col[7])):
                self.g.add((URIRef(entity_itemName_uri), self.cw.itemName, Literal(col[7], datatype=XSD.string)))

            
            # FirstLineOfAddress
            self.g.add((URIRef(entity_firstLineOfAddress_uri), OWL.DatatypeProperty, self.cw.firstLineAddress))
            if (not self.is_nan(col[1])):
                self.g.add((URIRef(entity_firstLineOfAddress_uri), self.cw.firstLineAddress, Literal(col[1], datatype=XSD.string)))

        
            # Country name triple            
            self.g.add((URIRef(entity_country_uri), self.cw.Country, Literal(col[3], datatype=XSD.string)))
            
            # City name triple            
            self.g.add((URIRef(entity_city_uri), self.cw.City, Literal(col[2], datatype=XSD.string)))
            
                       
            # Object Properties
            #cw:Currency
            if (not self.is_nan(col[9])):
                self.g.add((URIRef(entity_Currency_uri), self.cw.Currency, Literal(col[9], datatype=XSD.string)))
                

            # Restaurant names triples
            if (not self.is_nan(col[0])):
                self.g.add((URIRef(entity_Restaurant_uri), self.cw.restaurantName, Literal(col[0], datatype=XSD.string)))
            
            
        
            ###********************************************###
            ## adding values to the cw:Ingredient classes **##
            ###********************************************###
            if (not self.is_nan(col[10])):
                
                for x, val in enumerate(col[10]):
                    ingrd = str.split(col[10])
                    ingrd = [i.lower() for i in ingrd]
                    if "artichoke" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Artichokes))
                    elif "barbecue" in ingrd and "sauce" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.BarbecueSauce))
                    elif "basil" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Basil))
                    elif "beans" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Beans))
                    elif "beef" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Beef))
                    elif "blackolives" in ingrd or ("black" in ingrd and "olives" in ingrd):
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.BlackOlives))
                    elif "bluecheese" in ingrd or ("blue" in ingrd and "cheese" in ingrd):
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.BlueCheese))
                    elif "broccoli" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Broccoli))
                    elif "ButternutSquash" in ingrd or "Butternut" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.ButternutSquash))
                    elif "capers" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Capers))
                    elif "carrot" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Carrot))
                    elif "cheddar" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Cheddar))
                    elif "cheese" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Cheese))
                    elif "cherrytomato" in ingrd or ("cherry" in ingrd and "tomato" in ingrd):
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.CherryTomato))
                    elif "chicken" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Chicken))
                    elif "chorizo" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Chorizo))
                    elif "crabMeat" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.CrabMeat))
                    elif "eggplant" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Eggplant))
                    elif "feta" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Feta))
                    elif "fig" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Fig))
                    elif "fruit" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Fruit))
                    elif "garlic" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Garlic))
                    elif "goatcheese" in ingrd or ("goat" in ingrd and "cheese" in ingrd):
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.GoatCheese))
                    elif "gorgonzola" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Gorgonzola))
                    elif "greenolives" in ingrd or ("green" in ingrd and "olives" in ingrd):
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.GreenOlives))
                    elif "greenpepper" in ingrd or ("green" in ingrd and "pepper" in ingrd):
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.GreenPepper))
                    elif "ham" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Ham))
                    elif "herbs" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Herbs))
                    elif "hotsauce" in ingrd or ("hot" in ingrd and "sauce" in ingrd):
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.HotSauce))
                    elif "jalapenopepper" in ingrd or "jalapeno" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.JalapenoPepper))
                    elif "marinara" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Marinara))
                    elif "meat" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Meat))
                    elif "meatballs" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Meatballs))
                    elif "mortadella" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Mortadella))
                    elif "mozzarella" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Mozzarella))
                    elif "mushroom" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Mushroom))
                    elif "olives" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Olives))
                    elif "onion" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Onion))
                    elif "oregano" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Oregano))
                    elif "parmesan" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Parmesan))
                    elif "pepper" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Pepper))
                    elif "pepperoni" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Pepperoni))
                    elif "pesto" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Pesto))
                    elif "pineapple" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Pineapple))
                    elif "plumtomato" in ingrd or ("plum" in ingrd and "tomato"):
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.PlumTomato))
                    elif "potato" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Potato))
                    elif "prosciutto" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Prosciutto))
                    elif "provolone" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Provolone))
                    elif "pumpkin" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Pumpkin))
                    elif "redpepper" in ingrd or ("red" in ingrd and "pepper" in ingrd):
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.RedPepper))
                    elif "ricotta" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Ricotta))
                    elif "rootvegetable" in ingrd or ("root" in ingrd and "vegetable" in ingrd):
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.RootVegetable))
                    elif "rosemary" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Rosemary))
                    elif "salami" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Salami))
                    elif "salmon" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Salmon))
                    elif "sauce" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Sauce))
                    elif "sausage" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Sausage))
                    elif "scallops" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Scallops))
                    elif "seafood" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.SeaFood))
                    elif "seeds" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Seeds))
                    elif "shrimp" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Shrimp))
                    elif "spinach" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Spinach))
                    elif "sweetpotato" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.SweetPotato))
                    elif "tofu" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Tofu))
                    elif "tomato" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Tomato))
                    elif "tomatosauce" in ingrd or ("tomato" in ingrd and "sauce" in ingrd):
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.TomatoSauce))
                    elif "tuna" in ingrd:
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Tuna))
                    elif "vegancheese" in ingrd or ("vegan" in ingrd and "cheese" in ingrd):
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.VeganCheese))
                    elif "VeganIngredient" in ingrd or ("vegan" in ingrd and "ingredient" in ingrd):
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.VeganIngredient))
                    elif "vegetable" in ingrd :
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Vegetable))
                    elif "wintersquash" in ingrd :
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.WinterSquash))
                    elif "yellowpepper" in ingrd or ("yellow" in ingrd and "pepper" in ingrd):
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.YellowPepper))
                    elif "zucchini" in ingrd :
                        self.g.add((URIRef(entity_ingredient_uri), RDF.type, self.cw.Zucchini))




            ###*******************************************###
            ## adding cw:MenuItem values into the graph  **##
            ###*******************************************###
            if (not self.is_nan(col[7])):
                
                for x, val in enumerate(col[7]):
                    nmdPzz = str.split(col[7])
                    nmdPzz = [i.lower() for i in nmdPzz]
                    if "american" in nmdPzz:
                        self.g.add((URIRef(entity_menuItem_uri), RDF.type, self.cw.AmericanPizza))
                    elif "margherita" in nmdPzz:
                        self.g.add((URIRef(entity_menuItem_uri), RDF.type, self.cw.MargheritaPizza))
                    elif "supreme" in nmdPzz:
                        self.g.add((URIRef(entity_menuItem_uri), RDF.type, self.cw.PizzaSupreme))
                    elif "hawaiian" in nmdPzz:
                        self.g.add((URIRef(entity_menuItem_uri), RDF.type, self.cw.HawaiianPizza))
                    elif "marinara" in nmdPzz:
                        self.g.add((URIRef(entity_menuItem_uri), RDF.type, self.cw.PizzaMarinara))
                    elif "barbecue" in nmdPzz or "bbq" in nmdPzz:
                        self.g.add((URIRef(entity_menuItem_uri), RDF.type, self.cw.BarbecuePizza))
                    elif "californian" in nmdPzz:
                        self.g.add((URIRef(entity_menuItem_uri), RDF.type, self.cw.CalifornianPizza))
                    elif "chicken" in nmdPzz:
                        self.g.add((URIRef(entity_menuItem_uri), RDF.type, self.cw.ChickenPizza))
                    elif "fruit" in nmdPzz:
                        self.g.add((URIRef(entity_menuItem_uri), RDF.type, self.cw.FruitPizza))
                    elif "mexican" in nmdPzz:
                        self.g.add((URIRef(entity_menuItem_uri), RDF.type, self.cw.MexicanPizza))
                    elif "pineapple" in nmdPzz:
                        self.g.add((URIRef(entity_menuItem_uri), RDF.type, self.cw.PineapplePizza))
                    elif "bianca" in nmdPzz:
                        self.g.add((URIRef(entity_menuItem_uri), RDF.type, self.cw.PizzaBianca))
                    elif "napolitana" in nmdPzz:
                        self.g.add((URIRef(entity_menuItem_uri), RDF.type, self.cw.PizzaNapolitana))
                    elif "nutella" in nmdPzz:
                        self.g.add((URIRef(entity_menuItem_uri), RDF.type, self.cw.PizzaNutella))
                    elif "romana" in nmdPzz:
                        self.g.add((URIRef(entity_menuItem_uri), RDF.type, self.cw.PizzaRomana))
                    elif "supreme" in nmdPzz:
                        self.g.add((URIRef(entity_menuItem_uri), RDF.type, self.cw.PizzaSupreme))
                    elif "sweet" in nmdPzz:
                        self.g.add((URIRef(entity_menuItem_uri), RDF.type, self.cw.SweetPizza))
                    elif "vegetarian" in nmdPzz:
                        self.g.add((URIRef(entity_menuItem_uri), RDF.type, self.cw.VegetarianPizza))
                    elif "mushroom" in nmdPzz:
                        self.g.add((URIRef(entity_menuItem_uri), RDF.type, self.cw.MushroomPizza))
                    elif "meat" in nmdPzz:
                        self.g.add((URIRef(entity_menuItem_uri), RDF.type, self.cw.MeatPizza))
                    elif "japanese" in nmdPzz:
                        self.g.add((URIRef(entity_menuItem_uri), RDF.type, self.cw.JapanesePizza))
                    elif "feta" in nmdPzz:
                        self.g.add((URIRef(entity_menuItem_uri), RDF.type, self.cw.FetaPizza))
                    elif "beans" in nmdPzz:
                        self.g.add((URIRef(entity_menuItem_uri), RDF.type, self.cw.BeansPizza))
            

            
            ###***************************###
            # Restaurant types triples ****##
            ###***************************###
            if (not self.is_nan(col[0])) and (not self.is_nan(col[6])):
                self.g.add((URIRef(entity_Restaurant_uri), self.cw.restaurantName, Literal(col[0], datatype=XSD.string)))
            
                for x, val in enumerate(col[6]):
                    typ = str.split(col[6])
                    typ = [i.lower() for i in typ]
                    if "american" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.AmericanRestaurant))
                    elif "mexican" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.MexicanRestaurant))
                    elif "asian" in typ :
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.AsianRestaurant))
                    elif "chinese" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.ChineseRestaurant))
                    elif "indian" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.IndianRestaurant))
                    elif "japanese" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.JapaneseRestaurant))
                    elif "sushi" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.SushiRestaurant))
                    elif "bakery" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.Bakery))
                    elif "bar" in typ or "Grill" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.BarAndGrill))
                    elif "cocktail" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.CocktailBar))
                    elif "karaoke" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.KaraokeBar))
                    elif "sports" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.SportsBar))
                    elif "beer" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.BeerPlace))
                    elif "club" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.Club))
                    elif "pub" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.Pub))
                    elif "gastropub" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.Gastropub))
                    elif "coffee" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.CoffeeShop))
                    elif "burger" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.BurgerPlace))
                    elif "gourmet" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.GourmetRestaurants))
                    elif "mediterranean" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.MediterraneanRestaurant))
                    elif "french" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.FrenchRestaurant))
                    elif "greek" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.GreekRestaurant))
                    elif "italian" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.ItalianRestaurant))
                    elif "spanish" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.SpanishRestaurant))
                    elif "pizzeria" in typ or "pizza" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.Pizzeria))
                    elif "seafood" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.SeafoodRestaurant))
                    elif "dietary" in typ or "diet" in typ:
                        self.g.add((URIRef(entity_Restaurant_uri), RDF.type, self.cw.DietaryRestaurant))
            
            
            
            ####*************************####
            ### Object properties   ******###
            ####*************************####
            
            #locatedInCountry
            self.g.add((URIRef(entity_city_uri), self.cw.locatedInCountry, URIRef(entity_country_uri)))
            self.g.add((URIRef(entity_state_uri), self.cw.locatedInCountry, URIRef(entity_country_uri)))
            
            # cw:City locatedInState cw:State
            #self.g.add((URIRef(entity_city_uri), self.cw.locatedInState, URIRef(entity_state_uri)))

            # cw:locatedInState ##--> cw:City cw:locatedInState cw:State
            self.g.add((URIRef(entity_city_uri), self.cw.locatedInState, URIRef(entity_state_uri)))
            
            # cw:locatedInAdress  ##--> Restaurant cw:locatedInAdress Address
            self.g.add((URIRef(entity_Restaurant_uri), self.cw.locatedInAdress, URIRef(entity_address_uri)))
        
            # cw:locatedInCity ##--> cw:Restaurant cw:locatedInCity City
            self.g.add((URIRef(entity_Restaurant_uri), self.cw.locatedInCity, URIRef(entity_city_uri)))

            # cw:locatedInCity ##--> cw:Adress cw:locatedInCity cw:City
            self.g.add((URIRef(entity_address_uri), self.cw.locatedInCity, URIRef(entity_city_uri)))

            # cw:amountCurrency ##--> cw:ItemValue cw:amountCurrency cw:Currency 
            if (not self.is_nan(col[9])):
                self.g.add((URIRef(entity_itemValue_uri), self.cw.amountCurrency, URIRef(entity_Currency_uri)))
            
            # cw:serves ##--> cw:Restaurant cw:serves cw:MenuItem
            self.g.add((URIRef(entity_Restaurant_uri), self.cw.serves, URIRef(entity_menuItem_uri)))

            # cw:servedInRestaurant ##--> cw:MenuItem cw:servedInRestaurant cw:Restaurant
            self.g.add((URIRef(entity_menuItem_uri), self.cw.servedInRestaurant, URIRef(entity_Restaurant_uri)))

            # cw:isIngredientOf ##--> cw:Ingredient cw:isIngredientOf cw:MenuItem
            self.g.add((URIRef(entity_ingredient_uri), self.cw.isIngredientOf, URIRef(entity_menuItem_uri)))

            # hasValue ##--> cw:MenuItem cw:hasValue cw:itemValue
            self.g.add((URIRef(entity_menuItem_uri), self.cw.hasValue, URIRef(entity_itemValue_uri)))
            
            if (not self.is_nan(col[8])):
                self.g.add((URIRef(entity_itemValue_uri), self.cw.amount, Literal(col[8], datatype=XSD.double)))
            
            
       
                       
                       
            
            
            
            
        


    def CovertCSVToRDF(self, useExternalURI):
                 
        #In a large ontology one would need to find a more automatic way to use the ontology vocabulary. 
        #E.g.,  via matching. In a similar way as we match entities to a large KG like DBPedia or Wikidata
        #Since we are dealing with very manageable ontologies, we can integrate their vocabulary 
        #within the code. E.g.,: lab5.City
        
        
        #We modularize the transformation to RDF. The transformation is tailored to the given table, but 
        #the individual components/mappings are relatively generic (especially type and literal triples).
        
        #Mappings may required one or more columns as input and create 1 or more triples for an entity
        
        
        if 'country' in self.data_frame:
            
            #We give subject column and target type
            self.mappingToCreateTypeTriple('country', self.lab5.Country, useExternalURI)
            
            #We give subject and object columns (they could be the same), predicate and datatype 
            self.mappingToCreateLiteralTriple('country', 'country', self.lab5.name, XSD.string)
            
            
            if 'iso2' in self.data_frame:
                self.mappingToCreateLiteralTriple('country', 'iso2', self.lab5.iso2code, XSD.string)
            
            if 'iso3' in self.data_frame:
                self.mappingToCreateLiteralTriple('country', 'iso3', self.lab5.iso3code, XSD.string)
            
            
                
        if 'city_ascii' in self.data_frame:
            self.mappingToCreateTypeTriple('city_ascii', self.lab5.City, useExternalURI)
            self.mappingToCreateLiteralTriple('city_ascii', 'city_ascii', self.lab5.name_ascii, XSD.string)
        
        
            if 'city' in self.data_frame:
                self.mappingToCreateLiteralTriple('city_ascii', 'city', self.lab5.name, XSD.string)

            
            if 'admin_name' in self.data_frame:
               self.mappingToCreateLiteralTriple('city_ascii', 'admin_name', self.lab5.admin_name, XSD.string)
        
        
            
            if 'lat' in self.data_frame:
                self.mappingToCreateLiteralTriple('city_ascii', 'lat', self.lab5.latitude, XSD.float)
                
            if 'lng' in self.data_frame:
                self.mappingToCreateLiteralTriple('city_ascii', 'lng', self.lab5.longitude, XSD.float)
                
            if 'population' in self.data_frame:
                self.mappingToCreateLiteralTriple('city_ascii', 'population', self.lab5.population, XSD.long)
        
            
            
            if 'capital' in self.data_frame:
                #Special tailored mapping. We give column for subjects and objects 
                #and the column including the type of capital                
                self.mappingToCreateCapitalTriple('city_ascii', 'country', 'capital')
                
                #Alternative simpler mapping, but it does not consider capital information
                #self.mappingToCreateObjectTriple('city_ascii', 'country', self.lab5.cityIsLocatedIn)

        
        
        
        
          
    def createURIForEntity(self, name, useExternalURI):
        
        #We create fresh URI (default option)
        self.stringToURI[name] = self.lab5_ns_str + name.replace(" ", "_")
        
        if useExternalURI: #We connect to online KG
            uri = self.getExternalKGURI(name)
            if uri!="":
                self.stringToURI[name]=uri
        
        return self.stringToURI[name]
    
    
        
    def getExternalKGURI(self, name):
        '''
        Approximate solution: We get the entity with highest lexical similarity
        The use of context may be necessary in some cases        
        '''
        
        entities = self.dbpedia.getKGEntities(name, 5)
        #print("Entities from DBPedia:")
        current_sim = -1
        current_uri=''
        for ent in entities:           
            isub_score = isub(name, ent.label) 
            if current_sim < isub_score:
                current_uri = ent.ident
                current_sim = isub_score
        
            #print(current_uri)
        return current_uri 
            
    
    '''
    Mapping to create triples like lab5:London rdf:type lab5:City
    A mapping may create more than one triple
    column: columns where the entity information is stored
    useExternalURI: if URI is fresh or from external KG
    '''
    def mappingToCreateTypeTriple(self, subject_column, class_type, useExternalURI):
        
        for subject in self.data_frame[subject_column]:
                
            #We use the ascii name to create the fresh URI for a city in the dataset
            if subject.lower() in self.stringToURI:
                entity_uri=self.stringToURI[subject.lower()]
            else:
                entity_uri=self.createURIForEntity(subject.lower(), useExternalURI)
            
            #TYPE TRIPLE
            #For the individuals we use URIRef to create an object "URI" out of the string URIs
            #For the concepts we use the ones in the ontology and we are using the NameSpace class
            #Alternatively one could use URIRef(self.lab5_ns_str+"City") for example 
            self.g.add((URIRef(entity_uri), RDF.type, class_type))
        

                        
            


    def is_nan(self, x):
        return (x != x)
            
            
    '''
    Mappings to create triples of the form lab5:london lab5:name "London"
    '''    
    def mappingToCreateLiteralTriple(self, subject_column, object_column, predicate, datatype):
        
        for subject, lit_value in zip(self.data_frame[subject_column], self.data_frame[object_column]):
            
            if self.is_nan(lit_value) or lit_value==None or lit_value=="":
                pass
            
            else:
                #Uri as already created
                entity_uri=self.stringToURI[subject.lower()]
                    
                #Literal
                lit = Literal(lit_value, datatype=datatype)
                
                #New triple
                self.g.add((URIRef(entity_uri), predicate, lit))
            
    '''
    Mappings to create triples of the form lab5:london lab5:cityIsLocatedIn lab5:united_kingdom
    '''
    def mappingToCreateObjectTriple(self, subject_column, object_column, predicate):
        
        for subject, object in zip(self.data_frame[subject_column], self.data_frame[object_column]):
            
            if self.is_nan(object):
                pass
            
            else:
                #Uri as already created
                subject_uri=self.stringToURI[subject.lower()]
                object_uri=self.stringToURI[object.lower()]
                    
                #New triple
                self.g.add((URIRef(subject_uri), predicate, URIRef(object_uri)))
            
    
    
    def mappingToCreateCapitalTriple(self, subject_column, object_column, capital_value_column):
        
        for subject, object, value in zip(self.data_frame[subject_column], self.data_frame[object_column], self.data_frame[capital_value_column]):
            
            #URI as already created
            subject_uri=self.stringToURI[subject.lower()]
            object_uri=self.stringToURI[object.lower()]
            
            
            #(default) if value is empty or not expected
            predicate = self.lab5.cityIsLocatedIn
            
            if value=="admin":                      
                predicate = self.lab5.isFirstLevelAdminCapitalOf
            elif value=="primary":
                predicate = self.lab5.isCapitalOf                        
            elif value=="minor":
                predicate = self.lab5.isSecondLevelAdminCapitalOf
            
            
            #New triple
            #Note that the ontology in lab5.ttl contains a hierarchy of properties, range and domain axioms and inverses
            #Via reasoning this triple will lead to several entailments
            self.g.add((URIRef(subject_uri), predicate, URIRef(object_uri)))
    
    
    
    def performReasoning(self, ontology_file):
        
        #We expand the graph with the inferred triples
        #We use owlrl library with OWL2 RL Semantics (instead of RDFS semantic as we saw in lab 4)
        #More about OWL 2 RL Semantics in lecture/lab 7
        
        print("Data triples from CSV: '" + str(len(self.g)) + "'.")
    
    
        #We should load the ontology first
        #print(guess_format(ontology_file))
        self.g.load(ontology_file,  format=guess_format(ontology_file)) #e.g., format=ttl
        
        
        print("Triples including ontology: '" + str(len(self.g)) + "'.")
        
        
        #We apply reasoning and expand the graph with new triples 
        owlrl.DeductiveClosure(owlrl.OWLRL_Semantics, axiomatic_triples=False, datatype_axioms=False).expand(self.g)
        
        print("Triples after OWL 2 RL reasoning: '" + str(len(self.g)) + "'.")
    
    
    
    def performSPARQLQuery(self, file_query_out):
        '''
        qres = self.g.query(
            """SELECT DISTINCT ?State (COUNT(?City) AS ?Num_Cities) WHERE {
              ?City rdf:type cw:City .
              ?State rdf:type cw:State .
              ?City cw:locatedInState ?State .
        }
        GROUP BY ?State
        ORDER BY DESC(?State)
        """)
        '''
        qres = self.g.query(
            """SELECT DISTINCT ?State (COUNT(?Restaurant) AS ?Num_Restaurants) WHERE {
              ?City rdf:type cw:City .
              ?State rdf:type cw:State .
              ?City cw:locatedInState ?State .
              ?Restaurant cw:locatedInAdress ?Adress .
              ?Adress cw:locatedInCity ?City
        }
        GROUP BY ?State
        ORDER BY DESC(?State)
        """)

        print("%s Restaurants satisfying the query." % (str(len(qres))))
        
        f_out = open(file_query_out,"w+")

        for row in qres:
            #Row is a list of matched RDF terms: URIs, literals or blank nodes
            line_str = '\"%s\",\"%s\"\n' % (row.State, row.Num_Restaurants)


            f_out.write(line_str)
            
     
        f_out.close()       
        
        
    def performSPARQLQueryLab7(self):
        
        qres = self.g.query(
            """SELECT DISTINCT ?country (COUNT(?City) AS ?num_cities) WHERE { 
              ?Country cw:hasCity ?City .
        }
        GROUP BY ?Country
        ORDER BY DESC(?num_cities)
        """)


        print('Running Query')
        for row in qres:
            #Row is a list of matched RDF terms: URIs, literals or blank nodes
            line_str = '\"%s\",\"%s\"' % (row.country, row.num_cities)
            print(line_str)

            
        
        
    
    
    def saveGraph(self, file_output):
        
        ##SAVE/SERIALIZE GRAPH
        #print(self.g.serialize(format="turtle").decode("utf-8"))
        self.g.serialize(destination=file_output, format='ttl')
        
        
    
    
    

if __name__ == '__main__':
    
    #Format:
    #city    city_ascii    lat    lng    country    iso2    iso3    admin_name    capital    population
    #file = "worldcities-free-100.csv"
    file = "IN3067-INM713_coursework_data_pizza_500.csv"

    solution = Lab5Solution(file)
    
    #task = "task1"
    #task = "task2"
    task = "Simple_Mapping"
    
    #Create RDF triples
    if task == "task1":
        solution.Task1()  #Fresh entity URIs
    elif task == "task2":
        solution.Task2()  #Reusing URIs from DBPedia
    else:
        solution.SimpleUniqueMapping()  #Simple and unique mapping/transformation
        #solution.performSPARQLQueryLab7()
    
    #Graph with only data
    solution.saveGraph(file.replace(".csv", "-"+task)+".ttl")
    
    #OWL 2 RL reasoning
    #We will see reasoning next week. Not strictly necessary for this 
    #solution.performReasoning("pizza-restaurants-ontology.ttl") ##ttl format
    #solution.performReasoning("pizza-restaurants-ontology.owl") ##owl (rdf/xml) format
    
    #Graph with ontology triples and entailed triples       
    #solution.saveGraph(file.replace(".csv", "-"+task)+"-reasoning.ttl")
    
    #SPARQL results into CSV
    #solution.performSPARQLQuery(file.replace(".csv", "-"+task)+"-query-results.csv")
    
    
    #SPARQL for Lab 7 2021
    #solution.performSPARQLQueryLab7()
    
    
    
    
    
     



