In [10]:
from rdflib import URIRef, BNode, Literal, Graph, plugin, Namespace
from rdflib.serializer import Serializer 
from rdflib.namespace import FOAF, DC, RDF, RDFS, OWL, SKOS, XSD, NamespaceManager
import csv

In [11]:
kgcBase = Graph()
kgcBase.parse('./base.ttl', format='ttl')

<Graph identifier=N2084c6c94ca348a5b51bf79f7119babe (<class 'rdflib.graph.Graph'>)>

In [12]:
nmGraph = NamespaceManager(kgcBase)

kgc = Namespace("http://www.knowledgegraph.tech/")
nmGraph.bind("kgc", kgc)
kgcr = Namespace("http://www.knowledgegraph.tech/iri/")
nmGraph.bind("kgcr", kgcr)
schema = Namespace("http://schema.org/")
nmGraph.bind("schema", schema)
#wd = Namespace("http://www.wikidata.org/entity/")
wd = Namespace("http://www.wikidata.org/wiki/")
nmGraph.bind("wd", wd)
owl = Namespace("http://www.w3.org/2002/07/owl#")
nmGraph.bind("owl", owl)
skos = Namespace("http://www.w3.org/2004/02/skos/core#")
nmGraph.bind("skos", skos)
rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
nmGraph.bind("rdf", rdf)
rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
nmGraph.bind("rdfs", rdfs)
xml = Namespace("http://www.w3.org/XML/1998/namespace")
nmGraph.bind("xml", xml) 
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")
nmGraph.bind("xsd", xsd)

## Initializations

In [13]:
# Initializations - Changes in other versions 
startFrom = 0
lastId = startFrom + 1
labelDict = {}

kgcGraph2020=kgcBase

## Conference Information

In [14]:

KGC2020 = URIRef(("http://www.knowledgegraph.tech/iri/conference-2020"))
kgcGraph2020.add((KGC2020, schema.sameAs, wd.Q76451254))

kgcGraph2020.add((KGC2020, schema.name, Literal("Knowledge Graph Conference 2020")))
kgcGraph2020.add((KGC2020, RDF.type, schema.EventSeries))
kgcGraph2020.add((KGC2020, RDF.type, kgc.Conference))
kgcGraph2020.add((KGC2020, schema.eventAttendanceMode, schema.OnlineEventAttendanceMode))

<Graph identifier=N2084c6c94ca348a5b51bf79f7119babe (<class 'rdflib.graph.Graph'>)>

## Speakers

In [15]:
with open('KGC-2020-Speakers-Recon-1.csv', mode='r',encoding="utf-8") as csv_file:
    speaker_file = csv.DictReader(csv_file)


    for row in speaker_file:
        
        
        # Speaker's Data ---------------------------------------------------------------------------------------
        
        if row['Name']:
            name = row['Name']
            co=""
            
            if name in labelDict:
                speaker = labelDict[name]
                
            else:
                newId = str(lastId).zfill(6)
                speaker = URIRef(kgcr + newId)
                lastId += 1
                kgcGraph2020.add((speaker, RDF.type, schema.Person))
                kgcGraph2020.add((speaker, schema.name, Literal(name)))
                kgcGraph2020.add((speaker, RDFS.label, Literal(name)))
                
                
                if row["personQID"]:                    
                    for qid in row["personQID"].split(';'):                        
                        kgcGraph2020.add((speaker, schema.sameAs, URIRef(wd + qid.strip())))
                labelDict[name] = speaker
         
        
        
        if row["LinkedIn"]:
            linkedInProf = row["LinkedIn"]
            #print(speaker, linkedInProf,URIRef(linkedInProf))
            kgcGraph2020.add((speaker, kgc.linkedIn, URIRef(linkedInProf)))
            
            
        if row["Final Bios"]:
            kgcGraph2020.add((speaker, schema.description, Literal(row["Final Bios"])))
         
        if row["Title"]:
            kgcGraph2020.add((speaker, schema.jobTitle, Literal(row["Title"])))
         
               
        # Speaker's Country of Citizenship -------------------------------------------------------------------------- 
         
        if row["countryOfCitizenship"]:
          
            countryName = row["countryOfCitizenship"]
            if countryName in labelDict:
                country = labelDict[countryName]
            else:
                newId = str(lastId).zfill(6)
                country = URIRef(kgcr + newId)
                lastId += 1
                kgcGraph2020.add((country, RDF.type, schema.Country))
                kgcGraph2020.add((country, schema.name, Literal(countryName)))
                kgcGraph2020.add((country, RDFS.label, Literal(countryName)))
            
                if row["countryOfCitizenshipQID"]:
                    kgcGraph2020.add((country, schema.sameAs, URIRef(wd + row["countryOfCitizenshipQID"])))
                labelDict[countryName] = country
            
            kgcGraph2020.add((speaker, schema.nationality, country)) 
            
            
        # Speaker's School Name  -------------------------------------------------------------------------- 
               
        if row["educatedAt"]:
            schoolName = row["educatedAt"]
            if schoolName in labelDict:
                school = labelDict[schoolName]
            else:
                newId = str(lastId).zfill(6)
                school = URIRef(kgcr + newId)
                lastId += 1
                kgcGraph2020.add((school, RDF.type, schema.CollegeOrUniversity))
                kgcGraph2020.add((school, schema.name, Literal(schoolName)))
                kgcGraph2020.add((school, RDFS.label, Literal(schoolName)))
                
                if row["educatedAtQID"]:
                    kgcGraph2020.add((school, schema.sameAs, URIRef(wd + row["educatedAtQID"])))
                labelDict[schoolName] = school
            
            kgcGraph2020.add((speaker, schema.alumniOf, school))
    
    
    
        # Person's field of Work  -------------------------------------------------------------------------- 
                
        if row["personFieldOfWork"]:
            pfowName = row["personFieldOfWork"]
            
            if pfowName in labelDict:
                pfow = labelDict[pfowName]
            else:
                newId = str(lastId).zfill(6)
                pfow = URIRef(kgcr + newId)
                lastId += 1
                kgcGraph2020.add((pfow, RDF.type, kgc.FieldOfWork))
                kgcGraph2020.add((pfow, schema.name, Literal(pfowName)))
                kgcGraph2020.add((pfow, RDFS.label, Literal(pfowName)))
                
                if row["personFieldOfWorkQID"]:                    
                    kgcGraph2020.add((pfow, schema.sameAs, URIRef(wd + row["personFieldOfWorkQID"])))
                labelDict[pfowName] = pfow
                
            kgcGraph2020.add((speaker, schema.knowsAbout, pfow))
            kgcGraph2020.add((speaker, kgc.hasFieldOfWork, pfow))

            
       
        # Person's Company   -------------------------------------------------------------------------- 
       
        if row["Company"]:
            coName = row["Company"]
            
            if coName in labelDict:
                co = labelDict[coName]
            else:
                newId = str(lastId).zfill(6)
                co = URIRef(kgcr + newId)
                lastId += 1
                kgcGraph2020.add((co, RDF.type, schema.Organization))
                kgcGraph2020.add((co, schema.name, Literal(coName)))
                kgcGraph2020.add((co, RDFS.label, Literal(coName)))
                
                if row["companyQID"]:
                    
                    for qid in row["companyQID"].split(';'):
                        kgcGraph2020.add((co, schema.sameAs, URIRef(wd + qid.strip())))
                labelDict[coName] = co
            
            kgcGraph2020.add((speaker, schema.worksFor, co))
            
            
            
        # Consortium Name   --------------------------------------------------------------------------  
        
        if row["member of"]:
            consortiumName = row["member of"]
            
            if consortiumName in labelDict:
                consortium = labelDict[consortiumName]
            else:
                newId = str(lastId).zfill(6)
                consortium = URIRef(kgcr + newId)
                lastId += 1   
                kgcGraph2020.add((consortium, RDF.type, schema.Consortium))
                kgcGraph2020.add((consortium, schema.name, Literal(consortiumName)))
                kgcGraph2020.add((consortium, RDFS.label, Literal(consortiumName)))
                
                if row["memberOfQID"]:
                    kgcGraph2020.add((consortium, schema.sameAs, URIRef(wd + row["memberOfQID"])))
                labelDict[consortiumName] = consortium
            
            if co:
                kgcGraph2020.add((co, schema.memberOf, consortium))
            
            
        
        # Additional Type Name   -------------------------------------------------------------------------------
        
        if row["instance of"]:
            instanceOfBzName = row["instance of"]
             
            if instanceOfBzName in labelDict:
                instanceOfBz = labelDict[instanceOfBzName]
            else:
                newId = str(lastId).zfill(6)
                instanceOfBz = URIRef(kgcr + newId)
                lastId += 1 
                kgcGraph2020.add((instanceOfBz, RDF.type, kgc.BusinessType))
                kgcGraph2020.add((instanceOfBz, schema.name, Literal(instanceOfBzName)))
                kgcGraph2020.add((instanceOfBz, RDFS.label, Literal(instanceOfBzName)))
                
                if row["instanceOfQID"]: 
                    kgcGraph2020.add((instanceOfBz, schema.sameAs, URIRef(wd + row["instanceOfQID"])))
                labelDict[instanceOfBzName] = instanceOfBz
            
            if co:
                kgcGraph2020.add((co, kgc.hasBusinessType, instanceOfBz))   
                kgcGraph2020.add((co, schema.additionalType, instanceOfBz))
        
        
        # Industry Name   -------------------------------------------------------------------------------
        
        if row["industry"]:
            industryName = row["industry"]
            
            if industryName in labelDict:
                industry = labelDict[industryName]
            else:
                newId = str(lastId).zfill(6)
                industry = URIRef(kgcr + newId)
                lastId += 1
                kgcGraph2020.add((industry, RDF.type, kgc.BusinessIndustry))
                kgcGraph2020.add((industry, schema.name, Literal(industryName)))
                kgcGraph2020.add((industry, RDFS.label, Literal(industryName)))
                
                if row["industryQID"]:
                    kgcGraph2020.add((industry, schema.sameAs, URIRef(wd + row["industryQID"])))
                labelDict[industryName] = industry
            
            if co:
                kgcGraph2020.add((co, kgc.hasIndustry, industry))
                kgcGraph2020.add((co, schema.additionalType, industry))
           
 

        # Company's field of Work  -------------------------------------------------------------------------- 
        
        if row["companyFieldOfWork"]:
            cfowName = row["companyFieldOfWork"]
            
            if cfowName in labelDict:
                cfow = labelDict[cfowName]
            else:
                newId = str(lastId).zfill(6)
                cfow = URIRef(kgcr + newId)
                lastId += 1
                kgcGraph2020.add((cfow, RDF.type, kgc.FieldOfWork))
                kgcGraph2020.add((cfow, schema.name, Literal(cfowName)))
                kgcGraph2020.add((cfow, RDFS.label, Literal(cfowName)))
                
                if row["companyFieldOfWorkQID"]:
                    kgcGraph2020.add((cfow, schema.sameAs, URIRef(wd + row["companyFieldOfWorkQID"])))
                labelDict[cfowName] = cfow
            
            if co:
                kgcGraph2020.add((co, schema.knowsAbout, cfow))
                kgcGraph2020.add((co, kgc.hasFieldOfWork, cfow))

        
        # Postal Address   -------------------------------------------------------------------------------   
        if (row["Address"]) or (row["City"]) or (row["County"]) or (row["State"]) or (row["Country"]):
            
            
                     
            # Create the label for this address
            
            if row["Name"] and not row["Company"]:
                PAName = name + ","
            else:
                PAName = coName + ","                
            
            if row["Address"]:
                PAName = PAName + row["Address"] + ","
            if row["City"]:
                PAName = PAName + row["City"] + ","
            if row["County"]:
                PAName = PAName + row["County"] + ","
            if row["State"]:
                PAName = PAName + row["State"] + ","   
            if row["Country"]:
                PAName = PAName + row["Country"] 
         
            
            if PAName in labelDict:
                postaladdress = labelDict[PAName]
                
            else:
                #print("Adding ", PAName)
                newId = str(lastId).zfill(6)
                postaladdress = URIRef(kgcr + newId)
                lastId += 1 
                kgcGraph2020.add((postaladdress, RDF.type, schema.PostalAddress))
                kgcGraph2020.add((postaladdress, schema.name, Literal(PAName)))
                LPAName, RPAName = PAName.split(',',1)
                kgcGraph2020.add((postaladdress, RDFS.label, Literal(RPAName)))
                
                if row["Name"] and not row["Company"]:
                    kgcGraph2020.add((speaker, schema.address, postaladdress))
                else: 
                    kgcGraph2020.add((co, schema.address, postaladdress))
                
                
                #if row["PostalAddressQID"]:
                    #kgcGraph2020.add((postaladdress, schema.sameAs, URIRef(wd + row["PostalAddressQID"])))
                labelDict[PAName] = postaladdress
            
        # Stree Address   -------------------------------------------------------------------------------   
                
        if row["Address"]:
            AddressName = row["Address"]
            
            if AddressName in labelDict:
                Address = labelDict[AddressName]
            else:
                newId = str(lastId).zfill(6)
                Address = URIRef(kgcr + newId)
                lastId += 1 
                kgcGraph2020.add((Address, RDF.type, kgc.Address))
                kgcGraph2020.add((Address, schema.name, Literal(AddressName)))
                kgcGraph2020.add((Address, RDFS.label, Literal(AddressName)))
                
                if row["AddressQID"]:
                    kgcGraph2020.add((Address, schema.sameAs, URIRef(wd + row["AddressQID"])))
                labelDict[AddressName] = Address
                
            kgcGraph2020.add((postaladdress, kgc.streetAddress, Address))
            
            
                  
            
        # City    -------------------------------------------------------------------------------   
                        
        if row["City"]:
            CityName = row["City"]
            
            if CityName in labelDict:
                City = labelDict[CityName]
            else:
                newId = str(lastId).zfill(6)
                City = URIRef(kgcr + newId)
                lastId += 1 
                kgcGraph2020.add((City, schema.name, Literal(CityName)))
                kgcGraph2020.add((City, RDFS.label, Literal(CityName)))
                kgcGraph2020.add((City, RDF.type, schema.City))
                
                if row["CityQID"]:
                    kgcGraph2020.add((City, schema.sameAs, URIRef(wd + row["CityQID"])))
                labelDict[CityName] = City
            
            kgcGraph2020.add((postaladdress, kgc.addressCity, City))
            
            if row["Address"]:
                kgcGraph2020.add((Address, schema.geoWithin, City))
            
            
        # County   -------------------------------------------------------------------------------   
               
        if row["County"]:
            CountyName = row["County"]
            
            if CountyName in labelDict:
                County = labelDict[CountyName]
            else:
                newId = str(lastId).zfill(6)
                County = URIRef(kgcr + newId)
                lastId += 1 
                kgcGraph2020.add((County, schema.name, Literal(CountyName)))
                kgcGraph2020.add((County, RDFS.label, Literal(CountyName)))
                kgcGraph2020.add((County, RDF.type, kgc.County))
                
                if row["CountyQID"]:
                    kgcGraph2020.add((County, schema.sameAs, URIRef(wd + row["CountyQID"])))
                labelDict[CountyName] = County
            
            kgcGraph2020.add((postaladdress, kgc.addressCounty, County))
            
            if row["City"]:
                kgcGraph2020.add((City, schema.geoWithin, County))
            
        
        # State   -------------------------------------------------------------------------------   
              
        if row["State"]:
            StateName = row["State"]
            
            if StateName in labelDict:
                State = labelDict[StateName]
            else:
                newId = str(lastId).zfill(6)
                State = URIRef(kgcr + newId)
                lastId += 1 
                kgcGraph2020.add((State, schema.name, Literal(StateName)))
                kgcGraph2020.add((State, RDFS.label, Literal(StateName)))
                kgcGraph2020.add((State, RDF.type, schema.State))
                
                if row["StateQID"]:
                    kgcGraph2020.add((State, schema.sameAs, URIRef(wd + row["StateQID"])))
                labelDict[StateName] = State
            
            kgcGraph2020.add((postaladdress, kgc.addressRegion, State))
            
            if row["County"]:
                kgcGraph2020.add((County, schema.geoWithin, State))
                
            
        
        # Country   ------------------------------------------------------------------------------- 
                
        if row["Country"]:
            CountryName = row["Country"]
            
            if CountryName in labelDict:
                Country = labelDict[CountryName]
            else:
                newId = str(lastId).zfill(6)
                Country = URIRef(kgcr + newId)
                lastId += 1 
                kgcGraph2020.add((Country, schema.name, Literal(CountryName)))
                kgcGraph2020.add((Country, RDFS.label, Literal(CountryName)))
                kgcGraph2020.add((Country, RDF.type, schema.Country))
                
                if row["CountryQID"]:
                    kgcGraph2020.add((Country, schema.sameAs, URIRef(wd + row["CountryQID"])))
                labelDict[CountryName] = Country
            
            kgcGraph2020.add((postaladdress, schema.addressCountry, Country))
            
            if row["State"]:
                kgcGraph2020.add((State, schema.geoWithin, Country))        
               
        
        
    

## Presentations

In [16]:
#tagsDict={}

In [17]:
with open('KGC-2020-Presentations-Recon-1.csv', mode='r', encoding="utf-8") as csv_file:
    presentation_file = csv.DictReader(csv_file)

    for row in presentation_file:
        
        
        # Title + Description Presentation ------------------------------------------------------------------------------------
        
        
        if row['Title']:
            title = row['Title']            
            
            if title in labelDict:
                talk = labelDict[title]
            else:
                newId = str(lastId).zfill(6)
                talk = URIRef(kgcr + newId)
                lastId += 1
                kgcGraph2020.add((talk, RDF.type, kgc.Talk))
                kgcGraph2020.add((talk, schema.name, Literal(title)))
                kgcGraph2020.add((talk, RDFS.label, Literal(title)))
                
                labelDict[title] = talk    
            kgcGraph2020.add((talk, schema.superEvent, KGC2020))
            
        if row["Description"]:
            kgcGraph2020.add((talk, schema.abstract, Literal(row["Description"])))
                
        
        
        # Presenter of Presentation --------------------------------------------------------------------------------
        
        
        if row['Name']:
            presenterName = row['Name']
            
            
            if presenterName in labelDict:
                presenter = labelDict[presenterName]
                                
            else:
                print("Error: Presenter {} not in the list of speakers! Please add his full details to the Speakers File.".format(presenterName))
                newId = str(lastId).zfill(6)
                presenter = URIRef(kgcr + newId)
                lastId += 1
                kgcGraph2020.add((presenter, RDF.type, schema.Person))
                kgcGraph2020.add((presenter, schema.name, Literal(presenterName)))
                kgcGraph2020.add((presenter, RDFS.label, Literal(presenterName)))
                
                labelDict[title] = talk    
            kgcGraph2020.add((talk, schema.performer, presenter))
            
        
        # Tags of Presentation ------------------------------------------------------------------------------------
        
        if row["Tag"]:
            tagName = row["Tag"]
            
            if tagName in labelDict:
                tag = labelDict[tagName]
            else:
                newId = str(lastId).zfill(6)
                tag = URIRef(kgcr + newId)
                lastId += 1
                kgcGraph2020.add((tag, RDF.type, kgc.Tag))
                kgcGraph2020.add((tag, schema.name, Literal(tagName))) 
                kgcGraph2020.add((tag, RDFS.label, Literal(tagName)))
                
                if row["TagQID"]:
                    
                    kgcGraph2020.add((tag, schema.sameAs, URIRef(wd + row["TagQID"])))
                labelDict[tagName] = tag
            
            kgcGraph2020.add((talk, schema.about, tag)) 
        
        
        
        # Facet Of
        
        if row["FacetOf"]:
            facetName = row["FacetOf"]
            
            
            if facetName in labelDict:
                facet = labelDict[facetName]
            else:
                newId = str(lastId).zfill(6)
                facet = URIRef(kgcr + newId)
                lastId += 1
                kgcGraph2020.add((facet, RDF.type, kgc.Tag))
                kgcGraph2020.add((facet, schema.name, Literal(facetName))) 
                kgcGraph2020.add((facet, RDFS.label, Literal(facetName)))
                
                if row["FacetOfQID"]:
                    kgcGraph2020.add((facet, schema.sameAs, URIRef(wd + row["FacetOfQID"])))
                labelDict[facetName] = facet
            
            kgcGraph2020.add((tag, kgc.facetOf, facet)) 
            
            
            
        
        
        # Video URL of Presentation ------------------------------------------------------------------------------------
        
        
        if row["Video URL"]:
            videoURL = row["Video URL"]
            
            if videoURL in labelDict:
                video = labelDict[videoURL]
            else:
                newId = str(lastId).zfill(6)
                video = URIRef(kgcr + newId)
                lastId += 1
                kgcGraph2020.add((talk, schema.recordedIn, video))
                kgcGraph2020.add((video, RDF.type, schema.VideoObject))
                kgcGraph2020.add((video, schema.embedUrl, Literal(videoURL))) 
                ##The above is chosen to be Lieteral only based on data in file, else it would be URIRef 
                vidLabel= "Video Recording Of " + title
                kgcGraph2020.add((video, RDFS.label, Literal(vidLabel)))
                
                
                labelDict[videoURL] = video
            kgcGraph2020.add((talk, schema.workPerformed, video))    
                
                  
        # Slides URL of Presentation ------------------------------------------------------------------------------------
        
        
        if row["Slides URL"]:
            slidesURL = row["Slides URL"]
            
            if slidesURL in labelDict:
                slides = labelDict[slidesURL]
            else:
                newId = str(lastId).zfill(6)
                slides = URIRef(kgcr + newId)
                lastId += 1
                kgcGraph2020.add((slides, schema.about, talk))
                kgcGraph2020.add((slides, RDF.type, schema.PresentationDigitalDocument))
                kgcGraph2020.add((slides, schema.archivedAt, Literal(slidesURL)))
                #The above is chosen to be Lieteral only based on data in file, else it would be URIRef 
                slideLabel= "Presentation Slides Of " + title
                kgcGraph2020.add((slides, RDFS.label, Literal(slideLabel)))
                
                labelDict[slidesURL] = slides  
                
            kgcGraph2020.add((talk, schema.workPerformed, slides))

## Serialize

In [18]:
kgcGraph2020.serialize(destination = "kgc2020-1.ttl", format = "turtle")

<Graph identifier=N2084c6c94ca348a5b51bf79f7119babe (<class 'rdflib.graph.Graph'>)>