# Veronica Mars Knowledge Graph

## RDF Triples

In [5]:
'''
    Create the RDF Triples w/o time or reification
    This Version includes ontology-based refinement at creation time of the knowledge graph
'''
import rdflib
import xlrd
import pandas
from rdflib import URIRef, Literal, BNode,Namespace

data = { # similar format for how you made your pandas data fram
    "subject": [],
    "predicate": [],
    "object" : [],
}
SS_name = "VMars Triples.xlsx"
wb = xlrd.open_workbook(SS_name)

for l in range(wb.nsheets):
    sheet = wb.sheet_by_index(l)
    for i in range(1, sheet.nrows): # skip the first header line
        s = sheet.cell_value(i, 0)
        p = sheet.cell_value(i, 1)
        o = sheet.cell_value(i, 2)

        if not s or not p or not o: # if any are empty then no triple
            continue
            
        data["subject"].append(s)
        data["predicate"].append(p)
        data["object"].append(o)

    
data_processed = {
    'subject':[],'predicate':[],'object': []
}
ugly_token = {
    ' ': '_',
    '"': '',
}

# replace all ugly tokens and copy to new data structure
for x in data.keys(): #x is subj,obj,pred
    for item in data[x]:
        if type(item)!=str:
            data_processed[x].append(item)
            continue
        new_token=item       
        for k in ugly_token:
            new_token = new_token.strip()
            new_token=new_token.replace(k, ugly_token[k]) 
            
        data_processed[x].append(new_token)            

n = Namespace("http://UCLA_REU_2020.org/Veronica_Mars/")

g = rdflib.Graph()
pred_counts = {} # Dictionary mapping each predicate to the number of times they have been used

for i in range(len(data['subject'])):
    s = n[data_processed['subject'][i]]
    p = n[data_processed['predicate'][i]]
    o_data=data_processed['object'][i]
    if type(o_data)==float or type(o_data)==int:
        o_node=Literal(o_data)
    else:
        o_node=n[o_data]
    g.add((s, p, o_node))

    
'''
    Refinement:
        Reference to symmetrics and inverses in VM Predicates Google Sheet
    Here we list the symmetric predicates and the predicates with well-defined inverses.
    Then if one side of the symmetry or inverse is missing, we add it to the knowledge graph.
'''
symmetric_preds = [
    'bestfriend_of', 'sibling_of', 'friend_of', 'ex_of', 
    'ex_friend_of', 'married_to', 'has_affair_with'
]
inverse_pred = {
    'girlfriend_of': 'boyfriend_of'
}
# (x^-1)^-1 = x --> this line adds inverses of each explicit entry 
inverse_pred.update({inverse_pred[k]: k for k in inverse_pred})

num_added_triples = 0
added_triples = []

for pred in symmetric_preds:
    existing_t = g.triples((None, n[pred], None))
    for t in existing_t:
        inverse = t[::-1]
        if inverse not in g:
            num_added_triples += 1
            added_triples.append(list(map(lambda x: stripURI(x), inverse)))
            g.add(inverse)
for pred in inverse_pred:
    existing_t = g.triples((None, n[pred], None))
    for t in existing_t:
        inverse = (t[2], n[inverse_pred[pred]], t[0])
        if inverse not in g:
            num_added_triples += 1
            added_triples.append(list(map(lambda x: stripURI(x), inverse)))
            g.add(inverse)
print('triples added: {}'.format(num_added_triples))
#check g
for s, p, o in g:
   print((s, p, o))

triples added: 35
(rdflib.term.URIRef("http://UCLA_REU_2020.org/Veronica_Mars/Sarah's_DNA_Test"), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/consists_of'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/paternity'))
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Character'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/is_character'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Hamilton_Cho'))
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Lilly_Kane'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/part_of'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/pep_squad'))
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Carrie_Bishop'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/accused'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Chuck_Rook'))
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Jerry_Sacks'), rdflib.term.URIRef('ht

In [22]:
# save graph
g.serialize(destination="VM_KG.ttl", format='turtle')

In [8]:
'''
    Create the RDF triples with Reification and Episode Time
'''
import rdflib
import xlrd
import pandas
from rdflib import URIRef, Literal, BNode,Namespace

data = { # similar format for how you made your pandas data fram
    "subject": [],
    "predicate": [],
    "object" : [],
    "time": [], # time context for each triple
    "discovered_at": []
}
SS_name = "VMars Triples.xlsx"
wb = xlrd.open_workbook(SS_name)

for l in range(wb.nsheets):
    sheet = wb.sheet_by_index(l)
    for i in range(1, sheet.nrows): # skip the first header line
        s = sheet.cell_value(i, 0)
        p = sheet.cell_value(i, 1)
        o = sheet.cell_value(i, 2)
        t = sheet.cell_value(i, 3)
        
        if not t: # If there is no time in col 3 then assign episode time
            t = l
        
        if not s or not p or not o: # if any are empty then no triple
            continue
            
        data["subject"].append(s)
        data["predicate"].append(p)
        data["object"].append(o)
        
        if t != l: # if triple time and episode time are distinct
            data['discovered_at'].append(l * 10)
            data['time'].append(t)
        else:
            data["time"].append(l * 10)
            data["discovered_at"].append(None)

    
data_processed = {
    'subject':[],'predicate':[],'object': [], "time": [], "discovered_at": []
}
ugly_token = {
    ' ': '_',
    '"': '',
}

# replace all ugly tokens and copy to new data structure
for x in data.keys(): # x is subj, obj, pred, time
    for item in data[x]:
        if type(item) != str:
            data_processed[x].append(item)
            continue
        new_token=item
        new_token = new_token.strip()
        for k in ugly_token:
            new_token=new_token.replace(k, ugly_token[k]) 
        data_processed[x].append(new_token)            

n = Namespace("http://UCLA_REU_2020.org/Veronica_Mars/")

'''
 (x, meet, y) -->
     (meet1, hasSrc, x)
     (meet1, hasDst, y)
     (meet1, hasType, meet)
     (meet1, hasTime, 0950)
     (meet1, hasLoc, Neptune High School)
'''
g = rdflib.Graph()
pred_counts = {} # Dictionary mapping each predicate to the number of times they have been used
for i in range(len(data['subject'])):
    s = data_processed['subject'][i]
    p = data_processed['predicate'][i]
    o = data_processed['object'][i]
    t = data_processed['time'][i]
    d = data_processed['discovered_at'][i]
    
    if type(o)==float or type(o)==int:
        o_node=Literal(o)
    else:
        o_node=n[o]
    
    if p in pred_counts:
        pred_counts[p] += 1
    else:
        pred_counts[p] = 1
    
    g.add(( n[p+str(pred_counts[p])], n['hasSrc'], n[s] )) # (meet1, hasSrc, x)
    g.add(( n[p+str(pred_counts[p])], n['hasDst'], o_node )) # (meet1, hasDst, y)
    g.add(( n[p+str(pred_counts[p])], n['hasType'], n[p] )) # (meet1, hasType, meet)
    g.add(( n[p+str(pred_counts[p])], n['hasTime'], Literal(t) )) # (meet1, hasTime, 0950)
    
    if d is not None:
        g.add(( n[p+str(pred_counts[p])], n['discovered_at'], Literal(d) )) # (meet1, discovered_at, 1050)

print(len(list(g.triples((None, None, None)))))

#check g
for s, p, o in g:
    #print((s, p, o))
    pass

# Example of how to find all friend instances in new knowledge graph
# Note that WE HAVENT REFINED THE GRAPH
# Do a function which is the opposite of stripURI
print(pred_counts['clue_of'])
all_friends = list(g.subjects(predicate=URIRef('http://UCLA_REU_2020.org/Veronica_Mars/hasType'), object=URIRef('http://UCLA_REU_2020.org/Veronica_Mars/clue_of')))
for f_inst in all_friends:
    f1 = stripURI(next(g.objects(subject=f_inst, predicate=URIRef('http://UCLA_REU_2020.org/Veronica_Mars/hasSrc')), None))
    f2 = stripURI(next(g.objects(subject=f_inst, predicate=URIRef('http://UCLA_REU_2020.org/Veronica_Mars/hasDst')), None))
    
    print("{} is clue of {}".format(f1, f2))

4668
78
wristband is clue of Case11
Meg_Manning's_Purity is clue of Case8
Lilly's_Secret is clue of Case1
Karl's_email is clue of Case4
postcard is clue of Case15
fertilizer is clue of Case18
Ransom_video is clue of Case16
diary is clue of Case14
texts is clue of Case14
Sun_Tea is clue of Case10
Clayton_wifi is clue of Case18
Dog_man is clue of Case19
pinata is clue of Case5
Rolling_Stones_music is clue of Case14
big_gun is clue of Case18
Sarah's_DNA_Test is clue of Case7
sticker is clue of Case19
Bedroom is clue of Case10
Credit_Card is clue of Veronica's_stalker
house_key is clue of Case14
Jack_Daniels is clue of Case10
Candidate_order is clue of Case6
Speeding_Ticket is clue of Case1
Car is clue of Case17
Diamond_Pendant is clue of Case2
Wanda_rulez is clue of Case6
purity_test_score is clue of Case8
Exploding_tennis_balls is clue of Case18
number is clue of Case11
Holly_Mills is clue of Case9
videotapes is clue of Case11
Abel_Koonz's_confession is clue of Case1
Credit_Card_Purchase

## Utils

In [4]:
'''
    Utils
'''
def stripURI(x):
    return x.split("/")[-1]

def replaceUgly(df, ugly_tokens):
    pass


## visualization

In [5]:
'''
    Display all the clues of case1
'''

import matplotlib.pyplot as plt
import networkx as nx

def stripURI(x):
    return x.split("/")[-1]

a = URIRef('http://UCLA_REU_2020.org/Veronica_Mars/clue_of')
b = URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Case1')

%matplotlib qt
#%matplotlib inline
plt.figure(figsize=(20,20))

edgelabels={}
G = nx.DiGraph()
#plt.clf()
for subj in g.subjects(predicate=a, object=b):
    v1 = stripURI(subj)
    v2 = stripURI(b)
    G.add_edge(v1, v2)
    edgelabels[(v1,v2)] = stripURI(a)

#print(edge_labels)

pos = nx.spring_layout(G,k=0.15,iterations=20, scale=3)
nx.draw_networkx(G, pos=pos,font_size=8,node_color='pink')
nx.draw_networkx_edge_labels(G, pos=pos, edge_labels=edgelabels, font_size=7)

plt.show()

print(list(g.subjects(predicate=a, object=b)))


[rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/shot_glass'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/tapes'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Crime_Photographs'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/white_sneakers'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Phone_call'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/spy_pen'), rdflib.term.URIRef("http://UCLA_REU_2020.org/Veronica_Mars/Lilly's_Secret"), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/backpack'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/shoes'), rdflib.term.URIRef("http://UCLA_REU_2020.org/Veronica_Mars/Abel_Koonz's_bloody_clothing"), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Soccer_uniform'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Speeding_Ticket'), rdflib.term.URIRef("http://UCLA_REU_2020.org/Veronica_Mars/Abel_Koonz's_confession")]


The iterable function was deprecated in Matplotlib 3.1 and will be removed in 3.3. Use np.iterable instead.
  if not cb.iterable(width):
The iterable function was deprecated in Matplotlib 3.1 and will be removed in 3.3. Use np.iterable instead.
  if cb.iterable(node_size):  # many node sizes


In [6]:
'''
    Example SPARQL Query 1
    Output the financial status of every character
'''

qres = g.query(
    """PREFIX foaf: <http://UCLA_REU_2020.org/Veronica_Mars/>
       SELECT ?bname
       WHERE {
          ?aname foaf:has_financial_status ?bname.
       }""")

for row in qres:
    print(row)

(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/upper_class'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/upper_class'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/upper_class'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/lower_class'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/upper_class'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/upper_class'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/upper_class'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/lower_class'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/upper_class'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/upper_class'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/upper_class'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/upper_class'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/lower_class'),)
(rdflib.term

In [8]:
'''
    Example SPARQL Query 2
    Output the first name of every character
'''

qres = g.query(
    """PREFIX foaf: <http://UCLA_REU_2020.org/Veronica_Mars/>
       SELECT ?bname
       WHERE {
          ?aname foaf:has_first_name ?bname.
       }""")

for row in qres:
    print(row)

(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Vincent'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Ben'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Van'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Clarence'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Vic'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Josh'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Wallace'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Andrea'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Luke'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Sarah'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Carrie'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Nathan'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Richie'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Mandy'),)
(rdflib.term.URIR

In [6]:
'''
    Example SPARQL Query 3
    Output all triple of which Weevil_Navarro is the subject
'''

qres = g.query(
    """PREFIX foaf: <http://UCLA_REU_2020.org/Veronica_Mars/>
       SELECT ?predicate ?object
       WHERE {
           foaf:Weevil_Navarro ?predicate ?object.
       }""")

for row in qres:
    print(row)

(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/has_trait'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/juvenile_delinquent'))
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/dislikes'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Logan_Echolls'))
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/arrested_for'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Case2'))
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/client_of'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Case10'))
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/has_nickname'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Weevil'))
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/suspect_of'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Case2'))
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/has_profession'), rdflib.term.URIRef('http://UCLA_REU_2020.org/V

In [13]:
'''
    Example SPARQL Query 4
    Output all clues
'''
qres = g.query(
    """PREFIX foaf: <http://UCLA_REU_2020.org/Veronica_Mars/>
       SELECT ?subject ?object
       WHERE {
           ?subject foaf:clue_of ?object.
       }""")

for row in qres:
    print(row)

(rdflib.term.URIRef("http://UCLA_REU_2020.org/Veronica_Mars/Karl's_email"), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Case4'))
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/bet'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Case16'))
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Tritons'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Case12'))
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Witness_Protection_Program'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Case15'))
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Soccer_uniform'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Case1'))
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/white_sneakers'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/innocence_of_Abel_Koonz'))
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/videotapes'), rdflib.term.URIRef('http://UCLA_REU_2

In [49]:
'''
    Example SPARQL Query 5
    Output all clues of case 1
'''
qres = g.query(
    """PREFIX foaf: <http://UCLA_REU_2020.org/Veronica_Mars/>
       SELECT ?subject 
       WHERE {
                 ?subject foaf:clue_of foaf:Case1.
         
       }""")

for row in qres:
    print(row)

(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/spy_pen'),)
(rdflib.term.URIRef("http://UCLA_REU_2020.org/Veronica_Mars/Lilly's_Secret"),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/white_sneakers'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Speeding_Ticket'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/tapes'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Soccer_uniform'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Crime_Photographs'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/shot_glass'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/backpack'),)
(rdflib.term.URIRef("http://UCLA_REU_2020.org/Veronica_Mars/Abel_Koonz's_bloody_clothing"),)
(rdflib.term.URIRef("http://UCLA_REU_2020.org/Veronica_Mars/Abel_Koonz's_confession"),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Phone_call'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_

In [46]:
'''
    Example SPARQL Query 6
    Output all characters who are considered part of the 'outcasts' in the show.
'''
qres = g.query(
    """PREFIX foaf: <http://UCLA_REU_2020.org/Veronica_Mars/>
       SELECT ?subject 
       WHERE {
                 ?subject foaf:part_of foaf:outcasts.              
                      
         }""")

for row in qres:
    print(row)

(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Veronica_Mars'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Wanda_Varner'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Wallace_Fennel'),)
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Wilson'),)
