In [1]:
import pandas as pd #for handling csv and csv contents
from rdflib import Graph, Literal, RDF, URIRef, Namespace #basic RDF handling
from rdflib.namespace import FOAF , XSD, RDF, RDFS, OWL #most common namespaces
import urllib.parse #for parsing strings to URI's
import numpy as np

In [2]:
#Read file
df=pd.read_csv('marvel_characters_id.csv')
df.head()

Unnamed: 0,ID,Name,Alignment,Gender,EyeColor,Race,HairColor,Publisher,SkinColor,Height,Weight,Hero ID
0,0,A-Bomb,good,Male,yellow,Human,No Hair,Marvel Comics,,203.0,441.0,a_bomb
1,1,Abe Sapien,good,Male,blue,Icthyo Sapien,No Hair,Dark Horse Comics,blue,191.0,65.0,abe_sapien
2,2,Abin Sur,good,Male,blue,Ungaran,No Hair,DC Comics,red,185.0,90.0,abin_sur
3,3,Abomination,bad,Male,green,Human / Radiation,No Hair,Marvel Comics,,203.0,441.0,abomination
4,4,Abraxas,bad,Male,blue,Cosmic Entity,Black,Marvel Comics,,,,abraxas


In [3]:
#Define graph
g = Graph()
#Define namespaces
dbo = Namespace('http://dbpedia.org/ontology/')
schema = Namespace('http://schema.org/')
wd = Namespace('https://www.wikidata.org/wiki/')
ken_marvel = Namespace('https://w3id.org/um/ken4256/marvel/')
ken = Namespace('https://w3id.org/um/ken4256/')
ken_alignment = Namespace('https://w3id.org/um/ken4256/alignment/')
ken_gender = Namespace('https://w3id.org/um/ken4256/gender/')
ken_eyecolor = Namespace('https://w3id.org/um/ken4256/eyecolor/')
ken_race = Namespace('https://w3id.org/um/ken4256/race/')
ken_haircolor = Namespace('https://w3id.org/um/ken4256/haircolor/')
ken_publisher = Namespace('https://w3id.org/um/ken4256/publisher/')
ken_skincolor = Namespace('https://w3id.org/um/ken4256/skincolor/')
cbo = Namespace('http://comicmeta.org/cbo/')

In [4]:
#float("nan") == float("nan") 
import math
#math.isnan(float("nan"))

In [5]:
g = Graph()
for index, row in df.iterrows():
    #output only the values that are not NaN
    #based on previous checking: name and height has no NaN, but the other columns have
    g.add((URIRef(ken_marvel+row['Hero ID']), RDF.type, URIRef(cbo.Character) )) 
    g.add((URIRef(ken_marvel+row['Hero ID']), FOAF.name, Literal(row['Name'], lang = 'en') ))
    if (row['Alignment'] == row['Alignment']):
        g.add((URIRef(ken_alignment+urllib.parse.quote(row['Alignment'])), RDFS.label, Literal(row['Alignment'], lang="en")))
        g.add((URIRef(ken_marvel+row['Hero ID']), ken.alignment, URIRef(ken_alignment+urllib.parse.quote(row['Alignment'])) ))
    if (row['Gender'] == row['Gender']):
        g.add((URIRef(ken_gender+urllib.parse.quote(row['Gender'])), RDFS.label, Literal(row['Gender'], lang="en")))
        g.add((URIRef(ken_marvel+row['Hero ID']), schema.gender, URIRef(ken_gender+urllib.parse.quote(row['Gender'])) ))
    if (row['EyeColor'] == row['EyeColor']):
        g.add((URIRef(ken_eyecolor+urllib.parse.quote(row['EyeColor'])), RDFS.label, Literal(row['EyeColor'], lang="en")))
        g.add((URIRef(ken_marvel+row['Hero ID']), dbo.eyeColor, URIRef(ken_eyecolor+urllib.parse.quote(row['EyeColor'])) ))
    if (row['Race'] == row['Race']):
        g.add((URIRef(ken_race+urllib.parse.quote(row['Race'])), RDFS.label, Literal(row['Race'], lang="en")))
        g.add((URIRef(ken_marvel+row['Hero ID']), wd.Q3254959, URIRef(ken_race+urllib.parse.quote(row['Race'])) ))
    if (row['HairColor'] == row['HairColor']):
        g.add((URIRef(ken_haircolor+urllib.parse.quote(row['HairColor'])), RDFS.label, Literal(row['HairColor'], lang="en")))
        g.add((URIRef(ken_marvel+row['Hero ID']), dbo.hairColor, URIRef(ken_haircolor+urllib.parse.quote(row['HairColor'])) ))
    if (row['Publisher'] == row['Publisher']):
        g.add((URIRef(ken_publisher+urllib.parse.quote(row['Publisher'])), RDFS.label, Literal(row['Publisher'], lang="en")))
        g.add((URIRef(ken_marvel+row['Hero ID']), schema.publisher, URIRef(ken_publisher+urllib.parse.quote(row['Publisher'])) ))
    if (row['SkinColor'] == row['SkinColor']):
        g.add((URIRef(ken_skincolor+urllib.parse.quote(row['SkinColor'])), RDFS.label, Literal(row['SkinColor'], lang="en")))
        g.add((URIRef(ken_marvel+row['Hero ID']), dbo.skinColor, URIRef(ken_skincolor+urllib.parse.quote(row['SkinColor'])) ))
    g.add((URIRef(ken_marvel+row['Hero ID']), schema.height, Literal(row['Height'], datatype=XSD['float']) ))
    if (not math.isnan(row['Weight'])):
        g.add((URIRef(ken_marvel+row['Hero ID']), schema.weight, Literal(row['Weight'], datatype=XSD['float']) ))


In [6]:
#Bind the namespaces for prefixes for a more readable output
g.bind("foaf", FOAF)
g.bind("wd",wd)
g.bind("schema",schema)
g.bind("dbo",dbo)
g.bind("ken",ken)
g.bind("ken_marvel",ken_marvel)
g.bind("ken_alignment",ken_alignment)
g.bind("ken_gender",ken_gender)
g.bind("ken_eyecolor",ken_eyecolor)
g.bind("ken_race",ken_race)
g.bind("ken_haircolor",ken_haircolor)
g.bind("ken_publisher",ken_publisher)
g.bind("ken_skincolor",ken_skincolor)
g.bind("cbo",cbo)

In [7]:
#Define my own properties (with domain, range, comment)
g.add((URIRef(ken.alignment), RDF.type, URIRef(OWL.DatatypeProperty)))
g.add((URIRef(ken.alignment), RDFS.comment, Literal("Defines the superhero's alignment, i.e. if they are good, bad or neutral characters.", lang="en")))
g.add((URIRef(ken.alignment), RDFS.domain, URIRef(cbo.Character)))
g.add((URIRef(ken.alignment), RDFS.range, XSD['string']))

In [8]:
#print(g.serialize(format='turtle').decode('UTF-8'))

In [9]:
#Save output
g.serialize('output_marvel_characters.ttl',format='turtle')