In [44]:
from rdflib import URIRef, BNode, Literal, Graph, plugin, Namespace
from rdflib.serializer import Serializer 
from rdflib.namespace import FOAF, DC, RDF, RDFS, OWL, SKOS, NamespaceManager
import csv

import io
import pydotplus
from IPython.display import display, Image
from rdflib.tools.rdf2dot import rdf2dot

## Load Existing Graph

In [45]:
kgc2020 = Graph()
kgc2020.parse('./kgc2020.ttl', format='ttl')

<Graph identifier=N1f8eb08854224c52a9ca4f788e27d74a (<class 'rdflib.graph.Graph'>)>

In [46]:
def KgcNameSpace(g):
  nsList={}  
  for ns_prefix, namespace in g.namespaces():
    nsList[ns_prefix]=str(namespace)
  return nsList
  

g = Graph() 
g.parse('./kgc2020.ttl', format='ttl')



<Graph identifier=N2498f3b2a18c4da1aa942e79ef1aa9d0 (<class 'rdflib.graph.Graph'>)>

In [None]:
for i in KgcNameSpace(g):
    print(i)
#print(MyNameSpace(g))  # Merge with below Fix to show both parts of dictionary

In [47]:
#  First add with help of above, then add the extra parts

nmGraph = NamespaceManager(kgc2020)
schema = Namespace("http://schema.org/")
nmGraph.bind("schema", schema)
wd = Namespace("http://www.wikidata.org/entity/")
nmGraph.bind("wd", wd)
owl = Namespace("http://www.w3.org/2002/07/owl#")
nmGraph.bind("owl", owl)
skos = Namespace("http://www.w3.org/2004/02/skos/core#")
nmGraph.bind("skos", skos)
kgc = Namespace("http://www.knowledgegraph.tech/iri/")
nmGraph.bind("kgc", kgc)
owl = Namespace("http://www.w3.org/2002/07/owl#")
nmGraph.bind("owl", owl)
rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
nmGraph.bind("rdf", rdf)
rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
nmGraph.bind("rdfs", rdfs)
xml = Namespace("http://www.w3.org/XML/1998/namespace")
nmGraph.bind("xml", xml) 
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")
nmGraph.bind("xsd", xsd) 


In [48]:
labelDict = {}

In [49]:
# retrieve speaker entities from previous year 
res = kgc2020.query('''select distinct ?p ?name where 
{{?p a schema:Person; schema:name ?name} UNION
{?p a schema:Country; schema:name ?name} UNION
{?p a schema:EducationalOrganization; schema:name ?name} UNION
{?p a schema:Thing; schema:name ?name} UNION
{?p a schema:Organization; schema:name ?name} UNION
{?p a schema:Place; schema:name ?name} UNION
{?p a owl:class; schema:name ?name}
}''')

for row in res:
    #print(str(row[0]))
    #print(str(row[1]))
    labelDict[str(row[1])] = row[0]
    

In [55]:
res22 = kgc2020.query('''select distinct ?p ?name where 
{?p a owl:class; schema:name ?name}''')

for row in res22:
    print(str(row[0]))
    print(str(row[1]))
    

http://www.knowledgegraph.tech/iri/00036
public-benefit corporation
http://www.knowledgegraph.tech/iri/00087
land-grant university
http://www.knowledgegraph.tech/iri/00088
sun grant institution
http://www.knowledgegraph.tech/iri/00099
triplestore
http://www.knowledgegraph.tech/iri/00128
university in France
http://www.knowledgegraph.tech/iri/00149
internet hosting service
http://www.knowledgegraph.tech/iri/00153
computing platform
http://www.knowledgegraph.tech/iri/00154
application programming interface
http://www.knowledgegraph.tech/iri/00197
Gesellschaft mit beschränkter Haftung
http://www.knowledgegraph.tech/iri/00226
company
http://www.knowledgegraph.tech/iri/00233
multinational corporation
http://www.knowledgegraph.tech/iri/00237
investment bank
http://www.knowledgegraph.tech/iri/00238
asset management
http://www.knowledgegraph.tech/iri/00253
limited liability company
http://www.knowledgegraph.tech/iri/00278
consulting company
http://www.knowledgegraph.tech/iri/00306
division
htt

In [50]:
#retrieve last id # to increment from
res = kgc2020.query('''select ?s where {?s ?p ?o. filter(regex(str(?s),"http://www.knowledgegraph.tech/iri"))}''' )

    
ids = [int(str(r[0])[-5:]) for r in res]
ids.sort()
# print(ids)
lastId = ids[-1]+1
#print("Last ID is:", lastId)


In [None]:
# Question: Add the information from presentation files to graph of 2020 ?
# Nodes like creative work, etc

In [51]:
kgc2023 = kgc2020

In [52]:
# Query to get "http://www.knowledgegraph.tech" Problem with one row
res = kgc2020.query('''select ?es where {?es a schema:EventSeries}''')

for row in res:
    print(row.es)
    KGC=row.es

http://www.knowledgegraph.tech


In [53]:
KGC2021 = URIRef(("http://www.knowledgegraph.tech/conference-2021"))
kgc2023.add((KGC2021, OWL.sameAs, wd.Q106704796))
kgc2023.add((KGC2021, schema.name, Literal("Knowledge Graph Conference 2021")))
kgc2023.add((KGC2021, RDF.type, schema.Event))
kgc2023.add((KGC2021, schema.superEvent, KGC))
kgc2023.add((KGC2021, schema.eventAttendanceMode, schema.OnlineEventAttendanceMode))

<Graph identifier=N1f8eb08854224c52a9ca4f788e27d74a (<class 'rdflib.graph.Graph'>)>

## Speakers 

In [56]:
with open('KGC-2021-Speakers-Recon.csv', mode='r') as csv_file:
    speaker_file = csv.DictReader(csv_file)


    for row in speaker_file:
        
        
        # Speaker's Name ---------------------------------------------------------------------------------------
        name = row['Name']
        if name:
            if name in labelDict:
                speaker = labelDict[name]
                #print("Existing Name:", name)
            else:
                newId = str(lastId).zfill(5)
                speaker = URIRef(kgc + newId)
                #print("Else Section!", name, speaker)
                lastId += 1
                #print("Incremented Last ID is ", lastId)
                kgc2023.add((speaker, RDF.type, schema.Person))
                kgc2023.add((speaker, schema.name, Literal(name)))
                
                if row["personQID"]:
                    #print("personQID not empty!")
                    for qid in row["personQID"].split(';'):
                        #print("Qid is ", qid)
                        kgc2023.add((speaker, OWL.sameAs, URIRef(wd + qid.strip())))
                labelDict[name] = speaker
         
        
        if row["LinkedIn"]:
            #print("LinkedIn not empty!")
            kgc2023.add((speaker, owl.sameAs, URIRef(row["LinkedIn"])))
         
               
        # Speaker's Country of Citizenship -------------------------------------------------------------------------- 
        countryName = row["countryOfCitizenship"]
        if countryName:
            if countryName in labelDict:
                country = labelDict[countryName]
            else:
                newId = str(lastId).zfill(5)
                country = URIRef(kgc + newId)
                #print("Else Section!", country, countryName)
                lastId += 1
                kgc2023.add((country, RDF.type, schema.Country))
                kgc2023.add((country, schema.name, Literal(countryName)))
            
                if row["countryOfCitizenshipQID"]:
                    #print("country of citizenship QID empty!")
                    kgc2023.add((country, OWL.sameAs, URIRef(wd + row["countryOfCitizenshipQID"])))
                labelDict[countryName] = country
            kgc2023.add((speaker, schema.nationality, country)) 
            
            
        # Speaker's School Name  -------------------------------------------------------------------------- 
        schoolName = row["educatedAt"]
        if schoolName:
            if schoolName in labelDict:
                school = labelDict[schoolName]
            else:
                newId = str(lastId).zfill(5)
                school = URIRef(kgc + newId)
                #print("Else Section!", school, schoolName)
                lastId += 1
                kgc2023.add((school, schema.name, Literal(schoolName)))
                kgc2023.add((school, RDF.type, schema.EducationalOrganization))
                if row["educatedAtQID"]:
                    #print("education At QID not empty!")
                    kgc2023.add((school, OWL.sameAs, URIRef(wd + row["educatedAtQID"])))
                labelDict[schoolName] = school
            kgc2023.add((speaker, schema.alumniOf, school))
    
    
    
        # Person's field of Work  -------------------------------------------------------------------------- 
        pfowName = row["personFieldOfWork"]
        if pfowName:
            #print("personFieldOfWork not empty!")
            if pfowName in labelDict:
                pfow = labelDict[pfowName]
            else:
                newId = str(lastId).zfill(5)
                pfow = URIRef(kgc + newId)
                lastId += 1
                kgc2023.add((pfow, schema.name, Literal(pfowName)))
                kgc2023.add((pfow, RDF.type, schema.Thing))
                if row["personFieldOfWorkQID"]:
                    #print("personField QID not empty!")
                    kgc2023.add((pfow, OWL.sameAs, URIRef(wd + row["personFieldOfWorkQID"])))
                labelDict[pfowName] = pfow
            kgc2023.add((speaker, schema.knowsAbout, pfow))

            
       
        # Person's Company   -------------------------------------------------------------------------- 
        coName = row["Company"]
        if coName:
            #print("company not empty!")
            if coName in labelDict:
                co = labelDict[coName]
            else:
                newId = str(lastId).zfill(5)
                co = URIRef(kgc + newId)
                lastId += 1
                kgc2023.add((co, schema.name, Literal(coName)))
                kgc2023.add((co, RDF.type, schema.Organization))
                if row["companyQID"]:
                    #print("Company QID not empty!")
                    for qid in row["companyQID"].split(';'):
                        kgc2023.add((co, OWL.sameAs, URIRef(wd + qid.strip())))
                labelDict[coName] = co
            kgc2023.add((speaker, schema.worksFor, co))
            
            
            
        # Consortime Name   --------------------------------------------------------------------------  
        consortiumName = row["member of"]
        if consortiumName:
            #print("member of not empty!")
            if consortiumName in labelDict:
                consortium = labelDict[consortiumName]
            else:
                newId = str(lastId).zfill(5)
                consortium = URIRef(kgc + newId)
                lastId += 1                
                kgc2023.add((consortium, schema.name, Literal(consortiumName)))
                kgc2023.add((consortium, RDF.type, schema.Organization))
                if row["memberOfQID"]:
                    #print("member of QID not empty!")
                    kgc2023.add((consortium, OWL.sameAs, URIRef(wd + row["memberOfQID"])))
                labelDict[consortiumName] = consortium
            kgc2023.add((co, schema.memberOf, consortium))
            
        

        # HQ Name   -------------------------------------------------------------------------------   
        HQName = row["HQ"]
        if HQName:
            #print("HQ not empty!")
            if HQName in labelDict:
                HQ = labelDict[HQName]
            else:
                newId = str(lastId).zfill(5)
                HQ = URIRef(kgc + newId)
                lastId += 1 
                kgc2023.add((HQ, schema.name, Literal(HQName)))
                kgc2023.add((HQ, RDF.type, schema.Place))
                if row["HQQID1"]:
                    #print("HQID 1 not empty!")
                    kgc2023.add((HQ, OWL.sameAs, URIRef(wd + row["HQQID1"])))
                labelDict[HQName] = HQ
            kgc2023.add((co, schema.location, HQ))  
            
            
        # HQ2 Name   -------------------------------------------------------------------------------   
        HQ2Name = row["HQ2"]
        if HQ2Name:
            #print("HQ2 not empty!")
            if HQ2Name in labelDict:
                HQ2 = labelDict[HQ2Name]
            else:
                newId = str(lastId).zfill(5)
                HQ2 = URIRef(kgc + newId)
                lastId += 1 
                kgc2023.add((HQ2, schema.name, Literal(HQ2Name)))
                kgc2023.add((HQ2, RDF.type, schema.Place))
                if row["HQQID2"]:
                    #print("HQ ID2 not empty!")
                    kgc2023.add((HQ2, OWL.sameAs, URIRef(wd + row["HQQID2"])))
                labelDict[HQ2Name] = HQ2
            kgc2023.add((HQ, schema.geoWithin, HQ2))
            
            
        # HQ3 Name   -------------------------------------------------------------------------------   
        HQ3Name = row["HQ3"]
        if HQ3Name:
            #print("HQ3 not empty!")
            if HQ3Name in labelDict:
                HQ3 = labelDict[HQ3Name]
            else:
                newId = str(lastId).zfill(5)
                HQ3 = URIRef(kgc + newId)
                lastId += 1 
                kgc2023.add((HQ3, schema.name, Literal(HQ3Name)))
                kgc2023.add((HQ3, RDF.type, schema.Place))
                if row["HQQID3"]:
                    #print("HQID 3 not empty!")
                    kgc2023.add((HQ3, OWL.sameAs, URIRef(wd + row["HQQID3"])))
                labelDict[HQ3Name] = HQ3
            kgc2023.add((HQ2, schema.geoWithin, HQ3))
            
        
        # HQ4 Name   -------------------------------------------------------------------------------   
        HQ4Name = row["HQ4"]
        if HQ4Name:
            #print("HQ4 not empty!")
            if HQ4Name in labelDict:
                HQ4 = labelDict[HQ4Name]
            else:
                newId = str(lastId).zfill(5)
                HQ4 = URIRef(kgc + newId)
                lastId += 1 
                kgc2023.add((HQ4, schema.name, Literal(HQ4Name)))
                kgc2023.add((HQ4, RDF.type, schema.Place))
                if row["HQQID4"]:
                    #print("HQ ID4 not empty!")
                    kgc2023.add((HQ4, OWL.sameAs, URIRef(wd + row["HQQID4"])))
                labelDict[HQ4Name] = HQ4
            kgc2023.add((HQ3, schema.geoWithin, HQ4))
            
        
        # HQ5 Name   ------------------------------------------------------------------------------- 
        HQ5Name = row["HQ5"]
        #print("The type of Hq5 is ",type(HQ5Name))
        #if not ((HQ5Name is None) or (HQ5Name == "")):
        if HQ5Name:
            #print("HQ5 is not empty")
            if HQ5Name in labelDict:
                HQ5 = labelDict[HQ5Name]
            else:
                newId = str(lastId).zfill(5)
                HQ5 = URIRef(kgc + newId)
                lastId += 1 
                kgc2023.add((HQ5, schema.name, Literal(HQ5Name)))
                kgc2023.add((HQ5, RDF.type, schema.Place))
                if row["HQQID5"]:
                    #print("HQQID5 is not empty!")
                    kgc2023.add((HQ5, OWL.sameAs, URIRef(wd + row["HQQID5"]))) # Watch this!
                labelDict[HQ5Name] = HQ5
            kgc2023.add((HQ4, schema.geoWithin, HQ5))
            
        
        # Additional Type Name   -------------------------------------------------------------------------------
#         additionalTypeName = row["instance of"]
#         if additionalTypeName:
#             #print("instance of not empty!")
#             if additionalTypeName in labelDict:
#                 additionalType = labelDict[additionalTypeName]
#             else:
#                 newId = str(lastId).zfill(5)
#                 additionalType = URIRef(kgc + newId)
#                 lastId += 1 
#                 kgc2023.add((additionalType, schema.name, Literal(additionalTypeName)))
#                 kgc2023.add((additionalType, RDF.type, owl.class))
#                 if row["instanceOfQID"]: 
#                     #print("instance of QID not empty!")
#                     kgc2023.add((additionalType, OWL.sameAs, URIRef(wd + row["instanceOfQID"])))
#                 labelDict[additionalTypeName] = additionalType
#             kgc2023.add((co, schema.additionalType, additionalType))
            
          
         # Industry Name   -------------------------------------------------------------------------------
        industryName = row["industry"]
        if industryName:
            #print("industry Name not empty!")
            if industryName in labelDict:
                industry = labelDict[industryName]
            else:
                newId = str(lastId).zfill(5)
                industry = URIRef(kgc + newId)
                lastId += 1
                kgc2023.add((industry, schema.name, Literal(industryName)))
                kgc2023.add((industry, RDF.type, schema.Thing))
                if row["industryQID"]:
                    #print("industry QID not empty!")
                    kgc2023.add((industry, OWL.sameAs, URIRef(wd + row["industryQID"])))
                labelDict[industryName] = industry
            kgc2023.add((co, schema.knowsAbout, industry))          
            
        

    

HQ ID2 not empty!
HQ3 not empty!
HQ ID2 not empty!
HQ3 not empty!
HQ ID2 not empty!
HQ3 not empty!
HQ ID2 not empty!
HQ3 not empty!
HQ3 not empty!
HQ3 not empty!
HQ ID2 not empty!
HQ3 not empty!
HQ ID2 not empty!
HQ3 not empty!
HQ ID2 not empty!
HQ3 not empty!
HQ ID2 not empty!
HQ3 not empty!
HQ3 not empty!
HQ3 not empty!
HQ ID2 not empty!
HQ3 not empty!
HQ ID2 not empty!
HQ3 not empty!
HQ3 not empty!
HQ3 not empty!
HQ3 not empty!
HQ ID2 not empty!
HQ3 not empty!
HQ3 not empty!
HQ3 not empty!
HQ ID2 not empty!
HQ3 not empty!


## Presentations

In [57]:
tagsDict={}

#retrieve the list of tags
res = kgc2020.query('''select distinct ?c ?name where {?c a skos:Concept; schema:name ?name}''')
for row in res:
    print(row[1])
    print(row[0])
    tagsDict[str(row[1])] = row[0]

finance
http://www.knowledgegraph.tech/iri/00329
privacy
http://www.knowledgegraph.tech/iri/00330
European General Data Protection Regulation
http://www.knowledgegraph.tech/iri/00331
information privacy
http://www.knowledgegraph.tech/iri/00332
personal data
http://www.knowledgegraph.tech/iri/00333
privacy
http://www.knowledgegraph.tech/iri/00334
California Consumer Privacy Act
http://www.knowledgegraph.tech/iri/00335
information privacy
http://www.knowledgegraph.tech/iri/00336
personal data
http://www.knowledgegraph.tech/iri/00337
ontology
http://www.knowledgegraph.tech/iri/00338
recommender system
http://www.knowledgegraph.tech/iri/00339
data catalog
http://www.knowledgegraph.tech/iri/00341
metadata management
http://www.knowledgegraph.tech/iri/00342
cyber security
http://www.knowledgegraph.tech/iri/00344
anti-money laundering
http://www.knowledgegraph.tech/iri/00345
Regulatory compliance
http://www.knowledgegraph.tech/iri/00346
machine learning
http://www.knowledgegraph.tech/iri/0034

In [58]:
with open('KGC-2021-Presentations-Recon.csv', mode='r', encoding="utf-8") as csv_file:
    presentation_file = csv.DictReader(csv_file)

    for row in presentation_file:
        
        
        # Title + Description Presentation ------------------------------------------------------------------------------------
        title = row['Title']
        if title:
            if title in labelDict:
                talk = labelDict[title]
            else:
                newId = str(lastId).zfill(5)
                talk = URIRef(kgc + newId)
                lastId += 1
                kgc2023.add((talk, RDF.type, schema.Event))
                kgc2023.add((talk, RDF.type, schema.CreativeWork))
                kgc2023.add((talk, schema.name, Literal(title)))
                kgc2023.add((talk, schema.superEvent, KGC2021))
                if row["Description"]:
                    kgc2023.add((talk, schema.description, Literal(row["Description"])))
                labelDict[title] = talk
        
        
        # Presenter of Presentation --------------------------------------------------------------------------------
        presenterName = row['Name']
        if presenterName:
            if presenterName in labelDict:
                presenter = labelDict[presenterName]
                kgc2023.add((talk, schema.performer, presenter))
                
            else:
                print("Error: Presenter {} not in the list of speakers!".format(presenterName))
                #continue
                
        
        # Tags of Presentation ------------------------------------------------------------------------------------
        
        # Are tags under Skos? 
                     
        
        
        # Video URL of Presentation ------------------------------------------------------------------------------------
        
        videoURL = row["Video URL"]
        if videoURL:
            if videoURL in labelDict:
                video = labelDict[videoURL]
            else:
                newId = str(lastId).zfill(5)
                video = URIRef(kgc + newId)
                lastId += 1
                kgc2023.add((talk, schema.recordedIn, video))
                kgc2023.add((video, RDF.type, schema.VideoObject))
                kgc2023.add((video, schema.embedUrl, Literal(videoURL))) 
                labelDict[videoURL] = video
                
                
                  
        # Slides URL of Presentation ------------------------------------------------------------------------------------
        
        slidesURL = row["Slides URL"]
        if slidesURL:
            #print("Slides Not Empty!")
            if slidesURL in labelDict:
                slides = labelDict[slidesURL]
            else:
                newId = str(lastId).zfill(5)
                slides = URIRef(kgc + newId)
                lastId += 1
                kgc2023.add((slides, schema.about, talk))
                kgc2023.add((slides, RDF.type, schema.PresentationDigitalDocument))
                kgc2023.add((slides, schema.archivedAt, Literal(slidesURL))) 
                labelDict[slidesURL] = slides          

         


##  Serialize the Graph 

In [59]:
kgc2023.serialize(destination = "kgc23-V01.ttl", format = "turtle")

<Graph identifier=N1f8eb08854224c52a9ca4f788e27d74a (<class 'rdflib.graph.Graph'>)>

## Visualize

In [None]:
def visualize(g):
    stream = io.StringIO()
    rdf2dot(g, stream, opts = {display})
    dg = pydotplus.graph_from_dot_data(stream.getvalue())
    png = dg.create_png()
    display(Image(png))
    dg.write_svg('test.svg') # generate graph in svg.  # Note the problem with description field. Makes things so much larger

visualize(kgc2020)

# graph = pydotplus.graphviz.graph_from_dot_data(dot_data)
# Image(graph.create_png())

In [None]:
def visualize(g):
    stream = io.StringIO()
    rdf2dot(g, stream, opts = {display})
    dg = pydotplus.graphviz.graph_from_dot_data(stream.getvalue())
    png = dg.create_png()
    display(Image(png))
    

visualize(kgc2020)

# graph = pydotplus.graphviz.graph_from_dot_data(dot_data)
# Image(graph.create_png())

In [None]:
import rdflib
from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph
import networkx as nx
import matplotlib.pyplot as plt

url = './kgc2020.ttl'

g = rdflib.Graph()
result = g.parse(url, format='turtle')

G = rdflib_to_networkx_multidigraph(result)

# Plot Networkx instance of RDF Graph
pos = nx.spring_layout(G, scale=2)
edge_labels = nx.get_edge_attributes(G, 'r')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
nx.draw(G, with_labels=True)

#if not in interactive mode for 
plt.show()

In [None]:
See this:
    https://stackoverflow.com/questions/65405166/how-to-increase-the-size-of-the-image-and-save-the-picture

In [None]:
newId = str(lastId).zfill(5)
print(newId)