## Notebook 10: Adding to Ontology

In this last Notebook the prepared DataFrames about persons and compnaies are added to an ontology.

### Import libraries

1. **re**  - This is a library to process regular expressions. 

2. **Numpy** - Numpy is a library for the easy use of vectors, matrices or arrays in general. It simplifies various numerical operations. 

3. **Codecs** - This module provides access to the most common Python encoders and decoders for example to be used for text encoding.

4. **Pandas** - Pandas is a library to analyze and to manage data. It is used to create tables.

5. **RDFLib** - RDFLib is a common library when working with RDF in Python.

In [1]:
'''Import Statements'''
!pip install rdflib

import re
import numpy as np
import pandas as pd
import rdflib
from rdflib.namespace import RDF, RDFS, FOAF
from rdflib import Literal
#import warnings
# from google.colab import drive 
# drive.mount('/content/gdrive')



### Define Prefixes

In [19]:
# prefixes of all ontologies you will be using in the KG
transraz = rdflib.Namespace('http://transraz/')
addressbook = rdflib.Namespace('http://transraz/addressbook1908#')
dbo = rdflib.Namespace('http://dbpedia.org/ontology/')
foaf = rdflib.Namespace('http://xmlns.com/foaf/0.1/')
vcard = rdflib.Namespace('http://www.w3.org/2006/vcard/ns#')
schema = rdflib.Namespace('http://schema.org/')

### Import DataFrames

In [20]:
df = pd.read_csv('./Outputs/person_corrected11.csv', sep=';')
#df = pd.read_csv('/content/gdrive/MyDrive/MA Python/Outputs/2_person_df_fin.csv', lineterminator='\n')
company_df = pd.read_csv('./Outputs/company_df_fin_corrected1.csv', sep=';')
#company_df = pd.read_csv('/content/gdrive/MyDrive/MA Python/Outputs/2_company_df_fin.csv', lineterminator='\n')
df=df.fillna('')
company_df = company_df.fillna('')

### Knowledge Graph

In [None]:
#g will be your existing knowledge graph
g = rdflib.Graph()
g.parse('./Outputs/RDF.owl', format='xml')

### Adding Persons and Companies to the Knowledge Graph

In [22]:
'''Add a Person to the ontology'''
def addPersonsToOntology(g, df):
    for i in range(0, len(df.IRI)):

        g.add((addressbook[df.IRI[i]], RDF.type, dbo.Person))
        if df['Full Name'][i] !='':
            g.add((addressbook[df.IRI[i]], RDFS.label, Literal(df['Full Name'][i])))
        if df['Last Name'][i] !='':
            g.add((addressbook[df.IRI[i]], foaf.familyName, Literal(df['Last Name'][i])))
        if df['First Name Abbreviation'][i] != '':
            g.add((addressbook[df.IRI[i]], transraz.abbreviatedName, Literal(df['First Name Abbreviation'][i])))
        if df['Name_Coordinates'][i] != '':
            g.add((addressbook[df.IRI[i]], transraz.altoCoordinates, Literal(df['Name_Coordinates'][i])))

        #adding information about occupation    
        if df.Occ_IRI[i] != '':
            g.add((addressbook[df.Occ_IRI[i]], RDF.type, dbo.Profession))
            g.add((addressbook[df.IRI[i]], dbo.profession, addressbook[df.Occ_IRI[i]]))
            if df.Occupation[i] != '':
                g.add((addressbook[df.Occ_IRI[i]], RDFS.label, Literal(df.Occupation[i])))
            if df.Occupation_Coordinates[i] != '':
                g.add((addressbook[df.Occ_IRI[i]], transraz.altoCoordinates, Literal(df.Occupation_Coordinates[i])))
            if df.Norm_Occ_IRI[i] != '':
                g.add((addressbook[df.Norm_Occ_IRI[i]], RDF.type, dbo.Profession))
                g.add((addressbook[df.Norm_Occ_IRI[i]], RDFS.label, Literal(df['Normalized Occupation'][i])))
                g.add((addressbook[df.Occ_IRI[i]], transraz.normalizedOccupation, addressbook[df.Norm_Occ_IRI[i]]))
            elif df.Norm_Occ_1_IRI[i] != '':
                g.add((addressbook[df.Norm_Occ_1_IRI[i]], RDF.type, dbo.Profession))
                g.add((addressbook[df.Norm_Occ_1_IRI[i]], RDFS.label, Literal(df['Normalized Occupation 1'][i])))
                g.add((addressbook[df.Occ_IRI[i]], transraz.normalizedOccupation, addressbook[df.Norm_Occ_1_IRI[i]]))
                g.add((addressbook[df.Norm_Occ_2_IRI[i]], RDF.type, dbo.Profession))
                g.add((addressbook[df.Norm_Occ_2_IRI[i]], RDFS.label, Literal(df['Normalized Occupation 2'][i])))
                g.add((addressbook[df.Occ_IRI[i]], transraz.normalizedOccupation, addressbook[df.Norm_Occ_2_IRI[i]]))

        #adding information about street
        if df.Street_IRI[i] != '':
            g.add((addressbook[df.Street_IRI[i]], RDF.type, dbo.Street))
            g.add((addressbook[df.Street_IRI[i]], RDFS.label, Literal(df.Street[i])))
            if df.Norm_Street_IRI[i] != '':
                g.add((addressbook[df.Norm_Street_IRI[i]], RDF.type, dbo.Street))
                g.add((addressbook[df.Norm_Street_IRI[i]], RDFS.label, Literal(df['Normalized Street'][i])))
                g.add((addressbook[df.Street_IRI[i]], transraz.normalizedStreet, addressbook[df.Norm_Street_IRI[i]]))
            if df.Street_Coordinates[i] != '':
                g.add((addressbook[df.Street_IRI[i]], transraz.altoCoordinates, Literal(df.Street_Coordinates[i])))

        #adding information about address
        if df.Owner_Address_IRI[i] != '':
                g.add((addressbook[df.Owner_Address_IRI[i]], RDF.type, transraz.Address))
                g.add((addressbook[df.Owner_Address_IRI[i]], RDFS.label, Literal(df['Full Owner Address'][i])))
                g.add((addressbook[df.IRI[i]], transraz.address, addressbook[df.Owner_Address_IRI[i]]))
                g.add((addressbook[df.Owner_Address_IRI[i]], transraz.isAddressOf, addressbook[df.IRI[i]]))
                if df.Owner_Street_IRI[i] != '':
                    g.add((addressbook[df.Owner_Street_IRI[i]], RDF.type, dbo.Street))
                    g.add((addressbook[df.Owner_Street_IRI[i]], RDFS.label, Literal(df['Owner Street'][i])))
                    if df.Norm_Owner_Street_IRI[i] != '':
                        g.add((addressbook[df.Norm_Owner_Street_IRI[i]], RDF.type, dbo.Street))
                        g.add((addressbook[df.Norm_Owner_Street_IRI[i]], RDFS.label, Literal(df['Normalized Owner Street'][i])))
                        g.add((addressbook[df.Street_IRI[i]], transraz.normalizedStreet, addressbook[df.Norm_Owner_Street_IRI[i]]))
                    #if df.Street_Coordinates[i] != '':
                        #g.add((addressbook[df.Owner_Street_IRI[i]], transraz.altoCoordinates, Literal(df.Owner_Street_Coordinates[i])))
                if df.Address_IRI[i] != '':
                    g.add((addressbook[df.Address_IRI[i]], RDF.type, transraz.Address))
                    g.add((addressbook[df.IRI[i]], dbo.owner, addressbook[df.Address_IRI[i]]))
                    g.add((addressbook[df.Address_IRI[i]], RDFS.label, Literal(df['Full Address'][i])))
                    if df.Building_IRI[i] != '':
                        g.add((addressbook[df.Building_IRI[i]], RDF.type, dbo.Building))
                        g.add((addressbook[df.Building_IRI[i]], RDFS.label, Literal(df.Building[i])))
                        g.add((addressbook[df.Building_IRI[i]], transraz.streetAddress, addressbook[df.Street_IRI[i]]))
                        g.add((addressbook[df.Street_IRI[i]], transraz.isStreetAddressOf, addressbook[df.Building_IRI[i]]))
                    if df.BuildPart_IRI[i] != '':
                        g.add((addressbook[df.BuildPart_IRI[i]], RDF.type, transraz.BuildingPart))
                        g.add((addressbook[df.BuildPart_IRI[i]], RDFS.label, Literal(df.BuildingPart[i])))
                        g.add((addressbook[df.BuildPart_IRI[i]], dbo.part, addressbook[df.Building_IRI[i]]))
                    if df.BuildPartFloor_IRI[i] != '':
                        g.add((addressbook[df.BuildPartFloor_IRI[i]], RDF.type, transraz.BuildingPart))
                        g.add((addressbook[df.BuildPartFloor_IRI[i]], RDFS.label, Literal(df.BuildingPartFloor[i])))
                        g.add((addressbook[df.BuildPartFloor_IRI[i]], dbo.part, addressbook[df.BuildPart_IRI[i]]))
                        g.add((addressbook[df.BuildPartFloor_IRI[i]], transraz.address, addressbook[df.Address_IRI[i]]))
                        g.add((addressbook[df.Address_IRI[i]], transraz.isAddressOf, addressbook[df.BuildPartFloor_IRI[i]]))
        elif df.Address_IRI[i] != '':
            g.add((addressbook[df.Address_IRI[i]], RDF.type, transraz.Address))
            g.add((addressbook[df.IRI[i]], transraz.address, addressbook[df.Address_IRI[i]]))
            g.add((addressbook[df.Address_IRI[i]], RDFS.label, Literal(df['Full Address'][i])))
            if df.Building_IRI[i] != '':
                g.add((addressbook[df.Building_IRI[i]], RDF.type, dbo.Building))
                g.add((addressbook[df.Building_IRI[i]], RDFS.label, Literal(df.Building[i])))
                g.add((addressbook[df.Building_IRI[i]], transraz.streetAddress, addressbook[df.Street_IRI[i]]))
                g.add((addressbook[df.Street_IRI[i]], transraz.isStreetAddressOf, addressbook[df.Building_IRI[i]]))
            if df.BuildPart_IRI[i] != '':
                g.add((addressbook[df.BuildPart_IRI[i]], RDF.type, transraz.BuildingPart))
                g.add((addressbook[df.BuildPart_IRI[i]], RDFS.label, Literal(df.BuildingPart[i])))
                g.add((addressbook[df.BuildPart_IRI[i]], dbo.part, addressbook[df.Building_IRI[i]]))
            if df.BuildPartFloor_IRI[i] != '':
                g.add((addressbook[df.BuildPartFloor_IRI[i]], RDF.type, transraz.BuildingPart))
                g.add((addressbook[df.BuildPartFloor_IRI[i]], RDFS.label, Literal(df.BuildingPartFloor[i])))
                g.add((addressbook[df.BuildPartFloor_IRI[i]], dbo.part, addressbook[df.BuildPart_IRI[i]]))
                g.add((addressbook[df.BuildPartFloor_IRI[i]], transraz.address, addressbook[df.Address_IRI[i]]))
                g.add((addressbook[df.Address_IRI[i]], transraz.isAddressOf, addressbook[df.BuildPartFloor_IRI[i]]))

    return g       


In [23]:
'''Add a Company to the ontology'''
def addCompaniesToOntology(g, company_df):
    for i in range(0, len(company_df.Company_IRI)):
        g.add((addressbook[company_df.Company_IRI[i]], RDF.type, dbo.Organisation))
        if company_df['Company Name'][i] != '':
            g.add((addressbook[company_df.Company_IRI[i]], RDFS.label, Literal(company_df['Company Name'][i])))
        if company_df.Industry_IRI[i] != '':
            g.add((addressbook[company_df.Industry_IRI[i]], RDF.type, transraz.Branch))
            g.add((addressbook[company_df.Industry_IRI[i]], RDFS.label, Literal(company_df.Industry[i])))
            g.add((addressbook[company_df.Company_IRI[i]], transraz.branch, addressbook[company_df.Industry_IRI[i]]))
            g.add((addressbook[company_df.Industry_IRI[i]], transraz.isBranchOf, addressbook[company_df.Company_IRI[i]]))

        #adding information about company address
        if company_df.Address_IRI[i] != '':
            g.add((addressbook[company_df.Address_IRI[i]], RDF.type, transraz.Address))
            g.add((addressbook[company_df.Company_IRI[i]], transraz.address, addressbook[company_df.Address_IRI[i]]))
            g.add((addressbook[company_df.Address_IRI[i]], transraz.isAddressOf, addressbook[company_df.Company_IRI[i]]))
            g.add((addressbook[company_df.Address_IRI[i]], RDFS.label, Literal(company_df['Full Address'][i])))
            if company_df.Building_IRI[i] != '':
                  g.add((addressbook[company_df.Building_IRI[i]], RDF.type, dbo.Building))
                  g.add((addressbook[company_df.Building_IRI[i]], RDFS.label, Literal(company_df.Building[i])))
                  g.add((addressbook[company_df.Building_IRI[i]], transraz.streetAddress, addressbook[company_df.Street_IRI[i]]))
                  g.add((addressbook[company_df.Street_IRI[i]], transraz.isStreetAddressOf, addressbook[company_df.Building_IRI[i]]))

        #adding information about street
        if company_df.Street_IRI[i] != '':
            g.add((addressbook[company_df.Street_IRI[i]], RDF.type, dbo.Street))
            g.add((addressbook[company_df.Street_IRI[i]], RDFS.label, Literal(company_df.Street[i])))
            if company_df.Norm_Street_IRI[i] != '':
                g.add((addressbook[company_df.Norm_Street_IRI[i]], RDF.type, dbo.Street))
                g.add((addressbook[company_df.Norm_Street_IRI[i]], RDFS.label, Literal(company_df['Normalized Street'][i])))
                g.add((addressbook[company_df.Street_IRI[i]], transraz.normalizedStreet, addressbook[company_df.Norm_Street_IRI[i]]))
            if company_df.Street_Coordinates[i] != '':
                g.add((addressbook[company_df.Street_IRI[i]], transraz.altoCoordinates, Literal(company_df.Street_Coordinates[i])))
                        
    return g

In [24]:
df.head(100)

Unnamed: 0,IRI,Full Name,Last Name,First Name Abbreviation,Occ_IRI,Occupation,Occ_1_IRI,Occupation 1,Occ_2_IRI,Occupation 2,Norm_Occ_IRI,Normalized Occupation,Norm_Occ_1_IRI,Normalized Occupation 1,Norm_Occ_2_IRI,Normalized Occupation 2,Add Info,House Owner,Address_IRI,Full Address,Street_IRI,Street,Norm_Street_IRI,Normalized Street,House Number,Part of House,House Floor,Building_IRI,Building,BuildPart_IRI,BuildingPart,BuildPartFloor_IRI,BuildingPartFloor,Owner_Address_IRI,Full Owner Address,Owner_Street_IRI,Owner Street,Norm_Owner_Street_IRI,Normalized Owner Street,Owner Number,Name_Coordinates,Occupation_Coordinates,Street_Coordinates
0,person_0,G. Pirner,Pirner,G.,occupation_0,Wirt,,,,,occupation_1,Wirt/in (Gastwirt/in),,,,,(zur Siegesgöttin),True,address_0,"Altcrstraße 1, Vorderhaus",street_0,Altcrstraße,,,1,Vorderhaus,,building_0,Altcrstraße 1,buildpart_0,"Altcrstraße 1, Vorderhaus",buildpartfloor_0,"Altcrstraße 1, Vorderhaus",,,,,,,,,"Page:F_23_92.jpg\F_23_92-026.jpg, HEIGHT:50.0,...",
1,person_1,G. Bogner,Bogner,G.,occupation_2,Maurerpaller,,,,,occupation_3,Maurerpolier/in,,,,,,False,address_1,"Altcrstraße 1, Vorderhaus, 0",street_0,Altcrstraße,,,1,Vorderhaus,0,building_0,Altcrstraße 1,buildpart_0,"Altcrstraße 1, Vorderhaus",buildpartfloor_1,"Altcrstraße 1, Vorderhaus, 0",,,,,,,,,,
2,person_10,P. Sebald,Sebald,P.,occupation_10,Taglöhner,,,,,occupation_11,Tagelöhner/in,,,,,,False,address_4,"Altcrstraße 1, Vorderhaus, 4",street_0,Altcrstraße,,,1,Vorderhaus,4,building_0,Altcrstraße 1,buildpart_0,"Altcrstraße 1, Vorderhaus",buildpartfloor_4,"Altcrstraße 1, Vorderhaus, 4",,,,,,,,"Page:F_23_92.jpg\F_23_92-364.jpg, HEIGHT:43.0,...","Page:F_23_92.jpg\F_23_92-026.jpg, HEIGHT:63.0,...",
3,person_100,W. Gutmann,Gutmann,W.,occupation_119,Tagl,,,,,occupation_11,Tagelöhner/in,,,,,,False,address_16,"Altcrstraße 6, Vorderhaus",street_0,Altcrstraße,,,6,Vorderhaus,,building_3,Altcrstraße 6,buildpart_3,"Altcrstraße 6, Vorderhaus",buildpartfloor_16,"Altcrstraße 6, Vorderhaus",,,,,,,,"Page:F_23_92.jpg\F_23_92-191.jpg, HEIGHT:46.0,...","Page:F_23_92.jpg\F_23_92-104.jpg, HEIGHT:55.0,...",
4,person_1000,I. Teupert,Teupert,I.,occupation_12,Schlosser,,,,,occupation_13,Schlosser/in,,,,,,False,address_186,"Altcrstraße 137, Vorderhaus, 1",street_0,Altcrstraße,,,137,Vorderhaus,1,building_40,Altcrstraße 137,buildpart_50,"Altcrstraße 137, Vorderhaus",buildpartfloor_184,"Altcrstraße 137, Vorderhaus, 1",,,,,,,,,"Page:F_23_92.jpg\F_23_92-027.jpg, HEIGHT:50.0,...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,person_10086,M. Hörner,Hörner,M.,occupation_12,Schlosser,,,,,occupation_13,Schlosser/in,,,,,,False,address_2056,"Cramergasse 3, Vorderhaus, 2",street_49,Cramergasse,,,3,Vorderhaus,2,building_490,Cramergasse 3,buildpart_630,"Cramergasse 3, Vorderhaus",buildpartfloor_2023,"Cramergasse 3, Vorderhaus, 2",,,,,,,,,"Page:F_23_92.jpg\F_23_92-027.jpg, HEIGHT:50.0,...",
96,person_10087,G. Habicht,Habicht,G.,occupation_342,Werkmeister,,,,,occupation_343,Werkmeister/in,,,,,,False,address_2056,"Cramergasse 3, Vorderhaus, 2",street_49,Cramergasse,,,3,Vorderhaus,2,building_490,Cramergasse 3,buildpart_630,"Cramergasse 3, Vorderhaus",buildpartfloor_2023,"Cramergasse 3, Vorderhaus, 2",,,,,,,,,"Page:F_23_92.jpg\F_23_92-030.jpg, HEIGHT:51.0,...",
97,person_10088,I. Leuchtl,Leuchtl,I.,occupation_5391,Telegraphenmechaniker,,,,,occupation_5392,Telegrafenmechaniker/in,,,,,,False,address_2057,"Cramergasse 3, Vorderhaus, 3",street_49,Cramergasse,,,3,Vorderhaus,3,building_490,Cramergasse 3,buildpart_630,"Cramergasse 3, Vorderhaus",buildpartfloor_2024,"Cramergasse 3, Vorderhaus, 3",,,,,,,,,,
98,person_10089,H. Pelikan,Pelikan,H.,occupation_5393,Magazsgeh,,,,,occupation_1167,Magazingehilfe/-gehilfin,,,,,,False,address_2055,"Cramergasse 3, Vorderhaus",street_49,Cramergasse,,,3,Vorderhaus,,building_490,Cramergasse 3,buildpart_630,"Cramergasse 3, Vorderhaus",buildpartfloor_2022,"Cramergasse 3, Vorderhaus",,,,,,,,,,


### Saving the Knowledge Graph

In [25]:

g = addPersonsToOntology(g, df)
g = addCompaniesToOntology(g, company_df)

g.serialize(destination='./Outputs/AddressBook1908_KG_corrected.ttl', format='ttl')    


<Graph identifier=Nef2751bc5b1f4a9d821ea45fa4ab4064 (<class 'rdflib.graph.Graph'>)>