In [16]:
### IMPORTS ###
import rdflib
import pickle
import os
import urllib.parse

In [46]:
import Services

In [17]:
### SIMILARITY MEASURES ###

def conjuction(first: set, second: set) -> set:
    """Compute the conjunctive set of the 2 collections."""
    return first.intersection(second)

def disjunction(first: set, second: set) -> set:
    """Compute the disjunctive set of the 2 collections."""
    return first.union(second)
def difference(first: set, second: set):
    return first.difference(second)

def compute_sim(first:set, second:set, sim_type: str) -> tuple:
    """ Compute the similarity of 2 classes."""
    if sim_type == "jaccard":
        conj = conjuction(first, second)
        disj = disjunction(first, second)
        f_diff = difference(first, second)
        s_diff = difference(second, first)
        sim = len(conj) / len(disj)
    else:
        raise Exception('Similarity measure not implemented: {}'.format(sim_type))

    return sim, conj, disj, f_diff, s_diff

In [31]:
### INSTANCE RETRIEVAL ###

def lmdb_to_dbp_links(graph: rdflib.Graph, cls: str, unquote: bool):
    """Get all sameAs URIS from LMDB class to DBP links."""
    results = graph.query("""
            PREFIX lmdbm: <http://data.linkedmdb.org/resource/movie/>
            PREFIX owl: <http://www.w3.org/2002/07/owl#>
            SELECT ?obj WHERE {{
                ?sub rdf:type lmdbm:{} .
                ?sub owl:sameAs ?obj .
                FILTER REGEX(STR(?obj), 'dbpedia.org') .
            }}""".format(cls))
    if unquote:
        return list(map(lambda tup: urllib.parse.unquote(str(tup[0])), results))
    else:
        return list(map(lambda tup: str(tup[0]), results))

def dbp_subjects(graph: rdflib.Graph, cls: str, unquote: bool):
    """Get all subject URIS for a DBPedia class"""
    results = graph.query("""
                PREFIX dbo: <http://dbpedia.org/ontology/>
                SELECT ?sub WHERE {{
                    ?sub a dbo:{} .
            }}""".format(cls))
    if unquote:
        return list(map(lambda tup: urllib.parse.unquote(str(tup[0])), results))
    else:
        return list(map(lambda tup: str(tup[0]), results))

In [32]:
### LOAD FILM GRAPHS ###
lmdb_film_graph = pickle.load(open(os.path.join('Data', 'LMDB', 'FilmGraph.pkl'), 'rb'))
dbp_film_graph = pickle.load(open(os.path.join("Data", 'LodALot', 'DBPediaFilmGraph.pkl'), 'rb'))

In [33]:
### GET FILM INSTANCES TO COMPARE ###
lmdb_dbp_film_same_as = lmdb_to_dbp_links(lmdb_film_graph, 'film', True)
dbp_film_subj = dbp_subjects(dbp_film_graph, 'Film', True)

In [None]:
set(lmdb_dbp_film_same_as).intersection(dbp_film_subj)

In [34]:
### COMPARE FILM INSTANCES ###
sim, conj, disj, lmdb_dbp, dbp_lmdb = compute_sim(set(lmdb_dbp_film_same_as), set(dbp_film_subj), 'jaccard')
print('SIMILARITY: ', sim)
print('CONJUNCTION: ', len(conj))
print('DISJUNCTION: ', len(disj))
print(' LMDB \ DBP: ', len(lmdb_dbp))
print(' DBP \ LMDB: ', len(dbp_lmdb))

SIMILARITY:  0.030823180147241953
CONJUNCTION:  9282
DISJUNCTION:  301137
 LMDB \ DBP:  1243
 DBP \ LMDB:  290612


In [35]:
### LOAD FILM GRAPHS ###
lmdb_actor_graph = pickle.load(open(os.path.join('Data', 'LMDB', 'ActorGraph.pkl'), 'rb'))
dbp_actor_graph = pickle.load(open(os.path.join("Data", 'LodALot', 'DBPediaActorGraph.pkl'), 'rb'))

In [36]:
### GET ACTOR INSTANCES TO COMPARE ###
lmdb_dbp_actor_same_as = lmdb_to_dbp_links(lmdb_actor_graph, 'actor', True)
dbp_actor_subj = dbp_subjects(dbp_actor_graph, 'Actor', True)

In [37]:
### COMPARE ACTOR INSTANCES ###
sim, conj_actors, disj_actors, lmdb_dbp_actors, dbp_lmdb_actors = compute_sim(set(lmdb_dbp_actor_same_as), set(dbp_actor_subj), 'jaccard')
print('SIMILARITY: ', sim)
print('CONJUNCTION: ', len(conj))
print('DISJUNCTION: ', len(disj))
print(' LMDB \ DBP: ', len(lmdb_dbp))
print(' DBP \ LMDB: ', len(dbp_lmdb))

SIMILARITY:  0.008031509140735292
CONJUNCTION:  1354
DISJUNCTION:  168586
 LMDB \ DBP:  560
 DBP \ LMDB:  166672


In [41]:
list(lmdb_film_graph.query("""
    SELECT DISTINCT ?pred {
        ?sub ?pred ?obj .
        ?sub a <http://data.linkedmdb.org/resource/movie/>
    }
"""))

[(rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')),
 (rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#label')),
 (rdflib.term.URIRef('http://data.linkedmdb.org/resource/movie/director')),
 (rdflib.term.URIRef('http://dbpedia.org/property/hasPhotoCollection')),
 (rdflib.term.URIRef('http://data.linkedmdb.org/resource/movie/performance')),
 (rdflib.term.URIRef('http://www.w3.org/2002/07/owl#sameAs')),
 (rdflib.term.URIRef('http://data.linkedmdb.org/resource/movie/filmid')),
 (rdflib.term.URIRef('http://purl.org/dc/terms/title')),
 (rdflib.term.URIRef('http://data.linkedmdb.org/resource/movie/genre')),
 (rdflib.term.URIRef('http://data.linkedmdb.org/resource/movie/actor')),
 (rdflib.term.URIRef('http://xmlns.com/foaf/0.1/page')),
 (rdflib.term.URIRef('http://data.linkedmdb.org/resource/movie/writer')),
 (rdflib.term.URIRef('http://data.linkedmdb.org/resource/movie/editor')),
 (rdflib.term.URIRef('http://www.w3.org/2004/02/skos/core#subject')),
 (rdflib.term.

In [43]:
lmdb_dbp_film_same_as

['http://dbpedia.org/resource/The_Turn_in_the_Road',
 'http://dbpedia.org/resource/Bound_(film)',
 "http://dbpedia.org/resource/All_You've_Got",
 'http://dbpedia.org/resource/Father_was_a_Fullback',
 'http://dbpedia.org/resource/The_Portrait_of_a_Lady_(film)',
 'http://dbpedia.org/resource/Au_Revoir,_UFO',
 'http://dbpedia.org/resource/The_Adventures_of_Robin_Hood_(film)',
 'http://dbpedia.org/resource/Ghost_Story_(film)',
 'http://dbpedia.org/resource/Chal_Mere_Bhai',
 'http://dbpedia.org/resource/AdiÃ³s_muchachos',
 "http://dbpedia.org/resource/Lonesome_Luke's_Lovely_Rifle",
 'http://dbpedia.org/resource/Time_(film)',
 'http://dbpedia.org/resource/Health_(film)',
 'http://dbpedia.org/resource/What_a_Way_to_Go!',
 'http://dbpedia.org/resource/I_Eat_Your_Skin',
 'http://dbpedia.org/resource/ValentÃ\xadn',
 'http://dbpedia.org/resource/Strangers_When_We_Meet_(film)',
 'http://dbpedia.org/resource/Spring_in_Park_Lane',
 'http://dbpedia.org/resource/My_Mother,_the_Mermaid',
 'http://dbped