# Veronica Mars Knowledge Graph

## RDF Triples

In [1]:
'''
    Create the RDF Triples w/o time or reification
'''
import rdflib
import xlrd
import pandas
from rdflib import URIRef, Literal, BNode,Namespace

data = { # similar format for how you made your pandas data fram
    "subject": [],
    "predicate": [],
    "object" : [],
}
SS_name = "VMars Triples.xlsx"
wb = xlrd.open_workbook(SS_name)

for l in range(wb.nsheets):
    sheet = wb.sheet_by_index(l)
    for i in range(1, sheet.nrows): # skip the first header line
        s = sheet.cell_value(i, 0)
        p = sheet.cell_value(i, 1)
        o = sheet.cell_value(i, 2)

        if not s or not p or not o: # if any are empty then no triple
            continue
            
        data["subject"].append(s)
        data["predicate"].append(p)
        data["object"].append(o)

    
data_processed = {
    'subject':[],'predicate':[],'object': []
}
ugly_token = {
    ' ': '_',
    '"': '',
}

# replace all ugly tokens and copy to new data structure
for x in data.keys(): #x is subj,obj,pred
    for item in data[x]:
        if type(item)!=str:
            data_processed[x].append(item)
            continue
        new_token=item       
        for k in ugly_token:
            new_token = new_token.strip()
            new_token=new_token.replace(k, ugly_token[k]) 
            
        data_processed[x].append(new_token)            

n = Namespace("http://UCLA_REU_2020.org/Veronica_Mars/")

g = rdflib.Graph()
pred_counts = {} # Dictionary mapping each predicate to the number of times they have been used

for i in range(len(data['subject'])):
    s = n[data_processed['subject'][i]]
    p = n[data_processed['predicate'][i]]
    o_data=data_processed['object'][i]
    if type(o_data)==float or type(o_data)==int:
        o_node=Literal(o_data)
    else:
        o_node=n[o_data]
    g.add((s, p, o_node))

#check g
for s, p, o in g:
   print((s, p, o))

(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/oxcarbazepine'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/has_side_effect'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/uncontrollable,_emotional_fits'))
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Duncan_Kane'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/bestfriend_of'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Logan_Echolls'))
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/tapes'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/belongs_to'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Aaron_Echolls'))
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/white_sneakers'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/clue_of'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/innocence_of_Abel_Koonz'))
(rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Bone_Hamilton

In [None]:
'''
    Create the RDF triples with Reification and Episode Time
'''
import rdflib
import xlrd
import pandas
from rdflib import URIRef, Literal, BNode,Namespace

data = { # similar format for how you made your pandas data fram
    "subject": [],
    "predicate": [],
    "object" : [],
    "time": [], # time context for each triple
    "discovered_at": []
}
SS_name = "VMars Triples.xlsx"
wb = xlrd.open_workbook(SS_name)

for l in range(wb.nsheets):
    sheet = wb.sheet_by_index(l)
    for i in range(1, sheet.nrows): # skip the first header line
        s = sheet.cell_value(i, 0)
        p = sheet.cell_value(i, 1)
        o = sheet.cell_value(i, 2)
        t = sheet.cell_value(i, 3)
        
        if not t: # If there is no time in col 3 then assign episode time
            t = l
        
        if not s or not p or not o: # if any are empty then no triple
            continue
            
        data["subject"].append(s)
        data["predicate"].append(p)
        data["object"].append(o)
        
        if t != l: # if triple time and episode time are distinct
            data['discovered_at'].append(l * 10)
            data['time'].append(t)
        else:
            data["time"].append(l * 10)
            data["discovered_at"].append(None)

    
data_processed = {
    'subject':[],'predicate':[],'object': [], "time": [], "discovered_at": []
}
ugly_token = {
    ' ': '_',
    '"': '',
}

# replace all ugly tokens and copy to new data structure
for x in data.keys(): # x is subj, obj, pred, time
    for item in data[x]:
        if type(item) != str:
            data_processed[x].append(item)
            continue
        new_token=item
        new_token = new_token.strip()
        for k in ugly_token:
            new_token=new_token.replace(k, ugly_token[k]) 
        data_processed[x].append(new_token)            

n = Namespace("http://UCLA_REU_2020.org/Veronica_Mars/")

'''
 (x, meet, y) -->
     (meet1, hasSrc, x)
     (meet1, hasDst, y)
     (meet1, hasType, meet)
     (meet1, hasTime, 0950)
     (meet1, hasLoc, Neptune High School)
'''
g = rdflib.Graph()
pred_counts = {} # Dictionary mapping each predicate to the number of times they have been used
for i in range(len(data['subject'])):
    s = data_processed['subject'][i]
    p = data_processed['predicate'][i]
    o = data_processed['object'][i]
    t = data_processed['time'][i]
    d = data_processed['discovered_at'][i]
    
    if type(o)==float or type(o)==int:
        o_node=Literal(o)
    else:
        o_node=n[o]
    
    if p in pred_counts:
        pred_counts[p] += 1
    else:
        pred_counts[p] = 1
    
    g.add(( n[p+str(pred_counts[p])], n['hasSrc'], n[s] )) # (meet1, hasSrc, x)
    g.add(( n[p+str(pred_counts[p])], n['hasDst'], o_node )) # (meet1, hasDst, y)
    g.add(( n[p+str(pred_counts[p])], n['hasType'], n[p] )) # (meet1, hasType, meet)
    g.add(( n[p+str(pred_counts[p])], n['hasTime'], Literal(t) )) # (meet1, hasTime, 0950)
    
    if d is not None:
        g.add(( n[p+str(pred_counts[p])], n['discovered_at'], Literal(d) )) # (meet1, discovered_at, 1050)

print(len(list(g.triples((None, None, None)))))

#check g
for s, p, o in g:
    #print((s, p, o))
    pass

# Example of how to find all friend instances in new knowledge graph
# Note that WE HAVENT REFINED THE GRAPH
# Do a function which is the opposite of stripURI
print(pred_counts['clue_of'])
all_friends = list(g.subjects(predicate=URIRef('http://UCLA_REU_2020.org/Veronica_Mars/hasType'), object=URIRef('http://UCLA_REU_2020.org/Veronica_Mars/clue_of')))
for f_inst in all_friends:
    f1 = stripURI(next(g.objects(subject=f_inst, predicate=URIRef('http://UCLA_REU_2020.org/Veronica_Mars/hasSrc')), None))
    f2 = stripURI(next(g.objects(subject=f_inst, predicate=URIRef('http://UCLA_REU_2020.org/Veronica_Mars/hasDst')), None))
    
    print("{} is clue of {}".format(f1, f2))

## Utils

In [2]:
'''
    Utils
'''
def stripURI(x):
    return x.split("/")[-1]

def replaceUgly(df, ugly_tokens):
    pass


## visualization

In [3]:
import matplotlib.pyplot as plt
import networkx as nx
%matplotlib qt
#%matplotlib inline
plt.figure(figsize=(20,20))

edgelabels={}
G = nx.DiGraph()
#plt.clf()
for i in range(len(data['object'])):
    v1 = data['subject'][i]
    v2 = data['object'][i]
    G.add_edge(v1,v2)
    e_lbl = data['predicate'][i]
    edgelabels[(v1, v2)] = e_lbl

#print(edge_labels)

pos = nx.spring_layout(G,k=0.15,iterations=20, scale=3)
nx.draw_networkx(G, pos=pos,font_size=8,node_color='pink')#, with_labels=False, node_size=300)
#nx.draw_networkx_edge_labels(G, pos=pos, edge_labels=edgelabels, font_size=7)

plt.show()

In [None]:
'''
    Utils
'''
def stripURI(x):
    return x.split("/")[-1]

In [4]:
'''
    Display some subgraph.
'''
def stripURI(x):
    return x.split("/")[-1]

import matplotlib.pyplot as plt
import networkx as nx
%matplotlib qt
#%matplotlib inline
plt.figure(figsize=(20,20))

edgelabels={}
G = nx.DiGraph()
#plt.clf()
VM = URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Duncan_Kane')
completed_nodes = [] # used to prevent following cycles
queue = [VM]
# get all of the nodes connected to VM, following the direction of the graph.
while queue != []:
    current_node = queue.pop(0)
    completed_nodes.append(current_node)
    neighbours = g.predicate_objects(subject=current_node)
    subj = stripURI(current_node)
    for n in neighbours:
        obj = stripURI(n[1])
        pred = stripURI(n[0])
        if n[1] not in completed_nodes: # if False:
            queue.append(n[1])
        edgelabels[(subj, obj)] = pred
        G.add_edge(subj, obj)
        print(queue, end='\n\n')

#print(edge_labels)

pos = nx.spring_layout(G,k=0.15,iterations=20, scale=3)
nx.draw_networkx(G, pos=pos,font_size=8,node_color='pink')
nx.draw_networkx_edge_labels(G, pos=pos, edge_labels=edgelabels, font_size=7)

plt.show()

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)





[rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Hamilton_Cho'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Caz_Truman'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Fuller'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Sabrina'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Kane_Scholarship_Nominees'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/stolen_mascot'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/stolen_mascot'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Hamilton_Cho'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Hamilton_Cho'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Caz_Truman'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Fuller'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Sabrina'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Kane_Scholarship_Nominees'), rdflib.term.URIRef('

In [5]:
'''
    Display all the clues of case1
'''

import matplotlib.pyplot as plt
import networkx as nx

def stripURI(x):
    return x.split("/")[-1]

a = URIRef('http://UCLA_REU_2020.org/Veronica_Mars/clue_of')
b = URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Case1')

%matplotlib qt
#%matplotlib inline
plt.figure(figsize=(20,20))

edgelabels={}
G = nx.DiGraph()
#plt.clf()
for subj in g.subjects(predicate=a, object=b):
    v1 = stripURI(subj)
    v2 = stripURI(b)
    G.add_edge(v1, v2)
    edgelabels[(v1,v2)] = stripURI(a)

#print(edge_labels)

pos = nx.spring_layout(G,k=0.15,iterations=20, scale=3)
nx.draw_networkx(G, pos=pos,font_size=8,node_color='pink')
nx.draw_networkx_edge_labels(G, pos=pos, edge_labels=edgelabels, font_size=7)

plt.show()

print(list(g.subjects(predicate=a, object=b)))


[rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/shoes'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/backpack'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Soccer_uniform'), rdflib.term.URIRef("http://UCLA_REU_2020.org/Veronica_Mars/Abel_Koonz's_confession"), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/spy_pen'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/tapes'), rdflib.term.URIRef("http://UCLA_REU_2020.org/Veronica_Mars/Abel_Koonz's_bloody_clothing"), rdflib.term.URIRef("http://UCLA_REU_2020.org/Veronica_Mars/Lilly's_Secret"), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/shot_glass'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Phone_call'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Crime_Photographs'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/Speeding_Ticket'), rdflib.term.URIRef('http://UCLA_REU_2020.org/Veronica_Mars/white_sneakers')]


In [6]:
def random_walk_nondir(g, walk_length):
    '''
        Performs random walk over rdflib graph. Does not pay attention to direction of the knowledge graph.
        Usually in KG it is directed as subj-pred-obj
        Returns 2-tuple of lists (nodes_traversed, edges_traversed)
        g::rdflib graph
        walk_length::positive_int
    '''
    subjs = list(g.subjects())
    currentNode = random.choice(subjs)
    nodes_traversed = [stripURI(currentNode)]
    edges_traversed = []
    for i in range(walk_length):
        # get the possible nodes
        preds_objs = list(g.predicate_objects(subject = currentNode)) # (edge, node)
        subj_preds = list(g.subject_predicates(object = currentNode)) # (node, edge)
        # combine the two lists
        # list of (node, edge) tuples
        nodes_edges = [(t[0], t[1]) for t in subj_preds] + [(t[1], t[0]) for t in preds_objs]
        if len(nodes_edges) == 0: # nowhere to go, end walk
            break
        node, edge = random.choice(nodes_edges)
        nodes_traversed.append(stripURI(node))
        edges_traversed.append(stripURI(edge))
        currentNode = node
    return (nodes_traversed, edges_traversed)


In [None]:
'''
    Perform a random walk over the knowledge graph, recording both entities and edge labels passed through
'''
import random
#Here we consider the case where the random walk includes the predicates as well
cleaned_data = []
for i in range(3000):
    rw_nodes, rw_edges = random_walk_nondir(g, 50)
    rw_entity = rw_nodes + rw_edges
    lmao = ' '.join(entity for entity in rw_entity)
    cleaned_data.append(lmao)

In [7]:
'''
    Perform a random walk over the knowledge graph, recording only the entities passed through
'''
import random
#Here we consider the case where the random walks only extract the nodes
cleaned_data = []
for i in range(1000):
    rw_nodes, rw_edges = random_walk_nondir(g, 50)
    lmao = ' '.join(word for word in rw_nodes)
    cleaned_data.append(lmao)

In [32]:
'''
    Perform topic modelling on the corpus of random walks using NMF.
    Here each 'document' is a random walk on the knowledge graph.
'''

#Tf-idf with NMF
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline

count_vectorizer = TfidfVectorizer(stop_words='english')
count_data = count_vectorizer.fit_transform(cleaned_data)
idx_to_word = np.array(count_vectorizer.get_feature_names())

from sklearn.decomposition import NMF
nmf = NMF(n_components=50, solver="mu") # IMPORTANT: change n_components to change the number of topics output
H = nmf.fit_transform(count_data)
W = nmf.components_
 
# print the topics
 
for i, topic in enumerate(W):
 
    print("Topic {}: {}".format(i + 1, ",".join([str(x) for x in idx_to_word[topic.argsort()[-15:]]])))

Topic 1: alibi,troy_vandegraff,leanne_mars,09ers,clarence_weidman,upper_class,character,case1,keith_mars,jake_kane,logan_echolls,lilly_kane,celeste_kane,veronica_mars,duncan_kane
Topic 2: the_worm,eddie,vic,friday,girls_at_bar,body_shots,number,asphyxiation,katherine_wills,andrea_sims,videotapes,amy_polk,vic_sciaraffa,eddie_laroche,case11
Topic 3: stroke,money,rich,moon_calf_collective,dying,gant,mr,s_will,upper_class,_gant,mr_gant,mrs_gant,casey_gant,s_grandmother,casey
Topic 4: character,s_car,fertilizer,big_gun,student,fire_drills,website,bomb_threat,ben,killemall,clayton_wifi,exploding_tennis_balls,norris_clayton,case18,pete
Topic 5: bryce,sam,hamilton,character,bone,bloom,case13,club,benjamin_bloom,dime_bag,marcel,bryce_hamilton,yolanda_hamilton,sam_bloom,bone_hamilton
Topic 6: diary,susan_knight,previous_school,sk,rolling_stones_music,statutory_rape,texts,house_key,_silk_sheets,black,s_grade,carrie,carrie_bishop,case14,chuck_rook
Topic 7: darcy,caz,pizza_delivery_worker,truman,fu

## Embedding Analysis

In [9]:
'''
    Here we load the pretrained TransE embedding of the knowledge graph into a gensim word2vec class
    That is, the embedding being loaded is a low-dimensional graph embedding which was pretrained in OpenKE
'''
from gensim.models import KeyedVectors
vm = KeyedVectors.load_word2vec_format("VM_TransE_emb.txt", binary=False)

In [10]:
'''
    Example of finding the Word Mover's distance between two entities using the transE embedding
'''
print(vm.most_similar("veronica_mars"))
d1 = ["veronica_mars"]
d2 = ["duncan_kane"]
print(vm.wmdistance(d1,d2)) 
print("veronica_mars" in vm.vocab) # True
# Refer to https://radimrehurek.com/gensim/models/keyedvectors.html for other functions you can call on the model

[('duncan_kane', 0.6375635862350464), ('lilly_kane', 0.603159487247467), ('logan_echolls', 0.5903574228286743), ('likes', 0.528955340385437), ('wallace_fennel', 0.5254055857658386), ('don_lamb', 0.5107237100601196), ('keith_mars', 0.5057557821273804), ('framed', 0.5023417472839355), ('mother_of', 0.4498407244682312), ('mandy', 0.4483006000518799)]
7.949503421783447
True


In [11]:
def topicextract(i,j,W): #i is the topic index and j is the number of words extracted from this topic
    s = [str(x) for x in idx_to_word[W[i,:].argsort()[-j:]]]
    return s

In [12]:
'''
    Output the distance between each pair of topics.
    Change the values in each range to match the 'n_components' argument earlier. (Refer to when NMF was instanciated in this file) 
'''
for i in range(50):
    for j in range(50):
        print("Distance betweeen topic " + str(i+1) + "," + str(j+1) + " is " + str(vm.wmdistance(topicextract(i,10,W),topicextract(j,10,W))))

Distance betweeen topic 1,1 is 0.0
Distance betweeen topic 1,2 is 8.941387761757277
Distance betweeen topic 1,3 is 8.021777896285332
Distance betweeen topic 1,4 is 9.016234347417067
Distance betweeen topic 1,5 is 8.614157720344256
Distance betweeen topic 1,6 is 8.921655698895094
Distance betweeen topic 1,7 is 9.020872068357754
Distance betweeen topic 1,8 is 8.686357664103486
Distance betweeen topic 1,9 is 7.554583158241632
Distance betweeen topic 1,10 is 8.948465202083586
Distance betweeen topic 1,11 is 7.799661458155347
Distance betweeen topic 1,12 is 8.97822208425617
Distance betweeen topic 1,13 is 8.599148423140116
Distance betweeen topic 1,14 is 8.631638931626561
Distance betweeen topic 1,15 is 8.806871129060267
Distance betweeen topic 1,16 is 8.842614814482586
Distance betweeen topic 1,17 is 7.528743924406814
Distance betweeen topic 1,18 is 7.82812033004608
Distance betweeen topic 1,19 is 8.60433002262268
Distance betweeen topic 1,20 is 8.640165916334151
Distance betweeen topic 1,

Distance betweeen topic 5,48 is 4.32126275375414
Distance betweeen topic 5,49 is 5.886087013094711
Distance betweeen topic 5,50 is 6.860006856884097
Distance betweeen topic 6,1 is 8.921655698895094
Distance betweeen topic 6,2 is 6.186943372061996
Distance betweeen topic 6,3 is 6.881270084974266
Distance betweeen topic 6,4 is 5.69567215669526
Distance betweeen topic 6,5 is 6.503991643305475
Distance betweeen topic 6,6 is 0.0
Distance betweeen topic 6,7 is 6.613732371081083
Distance betweeen topic 6,8 is 6.144539056266219
Distance betweeen topic 6,9 is 6.192865917773112
Distance betweeen topic 6,10 is 5.585309846519764
Distance betweeen topic 6,11 is 6.811973769956817
Distance betweeen topic 6,12 is 5.983593264538481
Distance betweeen topic 6,13 is 5.592825900066121
Distance betweeen topic 6,14 is 5.645652388149085
Distance betweeen topic 6,15 is 5.8878036395640025
Distance betweeen topic 6,16 is 5.914956167800111
Distance betweeen topic 6,17 is 6.03723869729656
Distance betweeen topic 6

Distance betweeen topic 11,2 is 6.984511646653773
Distance betweeen topic 11,3 is 7.473788102655676
Distance betweeen topic 11,4 is 6.718837367784215
Distance betweeen topic 11,5 is 7.221628054066087
Distance betweeen topic 11,6 is 6.811973769956817
Distance betweeen topic 11,7 is 7.319075960916759
Distance betweeen topic 11,8 is 6.972588737133265
Distance betweeen topic 11,9 is 6.971384086052656
Distance betweeen topic 11,10 is 6.573456120206977
Distance betweeen topic 11,11 is 0.0
Distance betweeen topic 11,12 is 6.9745162039762985
Distance betweeen topic 11,13 is 6.59370553911782
Distance betweeen topic 11,14 is 6.672345049962044
Distance betweeen topic 11,15 is 6.841855744584276
Distance betweeen topic 11,16 is 6.610446197037372
Distance betweeen topic 11,17 is 6.547825443120719
Distance betweeen topic 11,18 is 6.533304636288452
Distance betweeen topic 11,19 is 5.3686423393565414
Distance betweeen topic 11,20 is 6.319747188831855
Distance betweeen topic 11,21 is 7.501366730988575
D

Distance betweeen topic 15,49 is 6.382118957164457
Distance betweeen topic 15,50 is 6.384766074086761
Distance betweeen topic 16,1 is 8.842614814482586
Distance betweeen topic 16,2 is 6.171462255956051
Distance betweeen topic 16,3 is 6.822339945638287
Distance betweeen topic 16,4 is 5.612055046192681
Distance betweeen topic 16,5 is 6.494937720505697
Distance betweeen topic 16,6 is 5.914956167800111
Distance betweeen topic 16,7 is 6.235306143404409
Distance betweeen topic 16,8 is 6.015437667663776
Distance betweeen topic 16,9 is 6.514772844490143
Distance betweeen topic 16,10 is 5.566006417979437
Distance betweeen topic 16,11 is 6.610446197037372
Distance betweeen topic 16,12 is 5.05794250227791
Distance betweeen topic 16,13 is 5.463401822300507
Distance betweeen topic 16,14 is 5.608448486012447
Distance betweeen topic 16,15 is 5.675934844791839
Distance betweeen topic 16,16 is 0.0
Distance betweeen topic 16,17 is 6.112846046373458
Distance betweeen topic 16,18 is 5.831640699565652
Dist

Distance betweeen topic 20,37 is 5.2226653004453505
Distance betweeen topic 20,38 is 6.233273264688492
Distance betweeen topic 20,39 is 5.102868474570512
Distance betweeen topic 20,40 is 5.915576277773793
Distance betweeen topic 20,41 is 5.492784541167977
Distance betweeen topic 20,42 is 5.8603759802413
Distance betweeen topic 20,43 is 5.0303969588923145
Distance betweeen topic 20,44 is 6.809820240213283
Distance betweeen topic 20,45 is 4.8859223012199875
Distance betweeen topic 20,46 is 6.6762138028628835
Distance betweeen topic 20,47 is 5.000469827144717
Distance betweeen topic 20,48 is 5.405437970209265
Distance betweeen topic 20,49 is 6.072806653091149
Distance betweeen topic 20,50 is 6.250791089149093
Distance betweeen topic 21,1 is 7.06452472151184
Distance betweeen topic 21,2 is 7.439603126782035
Distance betweeen topic 21,3 is 7.8710401179231475
Distance betweeen topic 21,4 is 7.037643853464889
Distance betweeen topic 21,5 is 7.499184679496192
Distance betweeen topic 21,6 is 7.

Distance betweeen topic 25,39 is 5.230822288457679
Distance betweeen topic 25,40 is 6.247272460867817
Distance betweeen topic 25,41 is 4.8229717273657915
Distance betweeen topic 25,42 is 5.981874423502852
Distance betweeen topic 25,43 is 5.184973759633904
Distance betweeen topic 25,44 is 6.846141528341899
Distance betweeen topic 25,45 is 4.982815886431455
Distance betweeen topic 25,46 is 6.8263743308501255
Distance betweeen topic 25,47 is 5.692908580937957
Distance betweeen topic 25,48 is 5.500451128567363
Distance betweeen topic 25,49 is 6.223737075955896
Distance betweeen topic 25,50 is 6.3022890456533425
Distance betweeen topic 26,1 is 8.600509657066343
Distance betweeen topic 26,2 is 6.215579042533111
Distance betweeen topic 26,3 is 6.4905625175295505
Distance betweeen topic 26,4 is 5.90574975594406
Distance betweeen topic 26,5 is 6.480548023925303
Distance betweeen topic 26,6 is 5.909318352424666
Distance betweeen topic 26,7 is 6.4583988308388705
Distance betweeen topic 26,8 is 6.

Distance betweeen topic 30,28 is 5.249263585121631
Distance betweeen topic 30,29 is 5.740893889510901
Distance betweeen topic 30,30 is 0.0
Distance betweeen topic 30,31 is 4.800545356083584
Distance betweeen topic 30,32 is 6.430045905936282
Distance betweeen topic 30,33 is 5.758659009400844
Distance betweeen topic 30,34 is 6.461892790649127
Distance betweeen topic 30,35 is 6.481858957870673
Distance betweeen topic 30,36 is 5.021861411219787
Distance betweeen topic 30,37 is 5.949957363275131
Distance betweeen topic 30,38 is 5.44846285384426
Distance betweeen topic 30,39 is 5.209925408901214
Distance betweeen topic 30,40 is 5.238116447210133
Distance betweeen topic 30,41 is 4.973744386729349
Distance betweeen topic 30,42 is 6.671032874086762
Distance betweeen topic 30,43 is 5.7877111705567525
Distance betweeen topic 30,44 is 7.354131201153913
Distance betweeen topic 30,45 is 4.957478174285888
Distance betweeen topic 30,46 is 7.022475314912559
Distance betweeen topic 30,47 is 6.2282469181

Distance betweeen topic 35,10 is 5.918997432626723
Distance betweeen topic 35,11 is 7.000823985279848
Distance betweeen topic 35,12 is 6.26608844169712
Distance betweeen topic 35,13 is 5.736951339994557
Distance betweeen topic 35,14 is 6.130637184852077
Distance betweeen topic 35,15 is 6.021086945004654
Distance betweeen topic 35,16 is 6.014964207423189
Distance betweeen topic 35,17 is 6.180886585229302
Distance betweeen topic 35,18 is 5.955540391651915
Distance betweeen topic 35,19 is 6.404833099355696
Distance betweeen topic 35,20 is 5.763948811377715
Distance betweeen topic 35,21 is 7.1822794435112
Distance betweeen topic 35,22 is 5.292258932203102
Distance betweeen topic 35,23 is 6.436187623736718
Distance betweeen topic 35,24 is 5.750402553814545
Distance betweeen topic 35,25 is 5.877546765608215
Distance betweeen topic 35,26 is 6.218393888978767
Distance betweeen topic 35,27 is 5.777780244949156
Distance betweeen topic 35,28 is 6.388024205602455
Distance betweeen topic 35,29 is 5

Distance betweeen topic 39,39 is 0.0
Distance betweeen topic 39,40 is 5.832843925270774
Distance betweeen topic 39,41 is 5.505242108863381
Distance betweeen topic 39,42 is 6.082076408073522
Distance betweeen topic 39,43 is 5.077541619439058
Distance betweeen topic 39,44 is 6.794999053222453
Distance betweeen topic 39,45 is 3.723039958860874
Distance betweeen topic 39,46 is 6.515276795972109
Distance betweeen topic 39,47 is 5.5704249264259325
Distance betweeen topic 39,48 is 5.117894891722584
Distance betweeen topic 39,49 is 5.833610513432912
Distance betweeen topic 39,50 is 6.19451095859003
Distance betweeen topic 40,1 is 6.599709934618209
Distance betweeen topic 40,2 is 6.499591973426559
Distance betweeen topic 40,3 is 6.928632079856463
Distance betweeen topic 40,4 is 6.429083810139509
Distance betweeen topic 40,5 is 6.567613618723061
Distance betweeen topic 40,6 is 6.353201770505271
Distance betweeen topic 40,7 is 6.813090621453605
Distance betweeen topic 40,8 is 6.195891351873701
Di

Distance betweeen topic 44,45 is 6.970654288727894
Distance betweeen topic 44,46 is 7.337283789702454
Distance betweeen topic 44,47 is 6.64723616775669
Distance betweeen topic 44,48 is 6.767246654612155
Distance betweeen topic 44,49 is 6.905470900814411
Distance betweeen topic 44,50 is 7.0629353594740385
Distance betweeen topic 45,1 is 8.829037931156156
Distance betweeen topic 45,2 is 4.918176817916488
Distance betweeen topic 45,3 is 6.224515261433853
Distance betweeen topic 45,4 is 5.434485875659179
Distance betweeen topic 45,5 is 6.065972755865478
Distance betweeen topic 45,6 is 5.4377707666588195
Distance betweeen topic 45,7 is 6.183672223291492
Distance betweeen topic 45,8 is 5.727864813340569
Distance betweeen topic 45,9 is 5.4021383353014
Distance betweeen topic 45,10 is 4.999469515241242
Distance betweeen topic 45,11 is 6.360553306538821
Distance betweeen topic 45,12 is 5.419439726830482
Distance betweeen topic 45,13 is 4.971095807561636
Distance betweeen topic 45,14 is 5.248254

Distance betweeen topic 50,1 is 6.909743388117027
Distance betweeen topic 50,2 is 6.688116065512847
Distance betweeen topic 50,3 is 6.976854027357323
Distance betweeen topic 50,4 is 6.499706731658745
Distance betweeen topic 50,5 is 6.860006856884097
Distance betweeen topic 50,6 is 6.557012817483513
Distance betweeen topic 50,7 is 6.889261360426139
Distance betweeen topic 50,8 is 6.559344619049265
Distance betweeen topic 50,9 is 6.821625998166658
Distance betweeen topic 50,10 is 6.325140357235718
Distance betweeen topic 50,11 is 6.732936969673158
Distance betweeen topic 50,12 is 6.630609659932136
Distance betweeen topic 50,13 is 6.295883452638075
Distance betweeen topic 50,14 is 6.450453871250153
Distance betweeen topic 50,15 is 6.384766074086761
Distance betweeen topic 50,16 is 6.374062509322697
Distance betweeen topic 50,17 is 5.240608569739723
Distance betweeen topic 50,18 is 6.164630375530243
Distance betweeen topic 50,19 is 5.082705317390442
Distance betweeen topic 50,20 is 6.25079

In [19]:
'''
    Output all entities
'''
#checking possibles subjects+obj
sub = list(g.subjects()) + list(g.objects())
refined_ent = {}
for entry in sub:
    if entry in refined_ent.keys():
        refined_ent[entry] += 1
    else:
        refined_ent[entry] = 1
entity = list(refined_ent.keys())
strip_entity  = []
for l in entity:
    strip_entity.append(stripURI(l))
print(strip_entity)
num_entity = len(strip_entity)
print(num_entity)

['oxcarbazepine', 'Duncan_Kane', 'tapes', 'white_sneakers', 'Bone_Hamilton', 'bet', 'Justin_Smith', 'Character', 'Meg_Manning', 'Carrie_Bishop', 'Mrs_Gant', 'Weevil_Navarro', 'Exploding_tennis_balls', 'Norris_Clayton', 'Eddie_LaRoche', 'Mr_Gant', 'SK', 'Lizzy_Manning', 'Case2', 'Student_aide', 'Kimmy', 'perpetrator', 'Clarence_Weidman', 'fertilizer', 'Drug_problem', 'Veronica_Mars', 'Case4', 'Jake_Kane', 'missing_dogs', 'Georgia', 'Madison_Sinclair', 'Josh', 'Logan_Echolls', 'Perpetrator', 'Tom_Cruz', 'Debra_Villareal', 'hard_drives', 'Sabrina_Fuller', 'Casey_Gant', 'Leticia_Navarro', 'Phone_call', 'Pete', 'Alicia_Fennel', 'Vic_Sciaraffa', 'Jack', 'Case11', 'purity_test_score', 'Gabe', 'Lynn_Echolls', 'Aaron_Echolls', 'diary', 'Wanda_Varner', 'Don_Lamb', 'spy_pen', 'Ultimate_Cash_Crop', 'previous_school', 'Mystery_Number', 'Keith_Mars', 'FakeLetter', 'Hamilton_Cho', 'Mrs._Donaldson', 'picture', 'Lilly_Kane', 'Chuck_Rook', 'Jimmy', 'Yolanda_Hamilton', 'Amy_Polk', 'Cliff_McCormack', 'sho

In [None]:
#checking possibles predicates
'''
    Output all possible predicates
'''
pred = list(g.predicates())
refinedpred = {}
for entry in pred:
    if entry in refinedpred.keys():
        refinedpred[entry] += 1
    else:
        refinedpred[entry] = 1
predicate = list(refinedpred.keys())
strip_predicate  = []
for l in predicate:
    strip_predicate.append(stripURI(l))
print(strip_predicate)
num_pred = len(strip_predicate)
print(num_pred)

In [22]:
'''
    Generate a low dimensional graph embedding based on the predicates attached to each entity.
    Refer to the final report for more detail on what this means
'''

import numpy as np
from sklearn.preprocessing import normalize

embedding_mat = np.zeros((230,570))

for s,p,o in g:
    indexs = entity.index(s) 
    indexp = predicate.index(p)
    indexo = entity.index(o)
    embedding_mat[indexp, indexs] = embedding_mat[indexp,indexs] + 1
    embedding_mat[indexp + 115, indexo] = embedding_mat[indexp + 115, indexo] + 1

predicate_embedding = normalize(embedding_mat, axis = 0, norm= 'l1')

In [25]:
'''
    Write the predicate emebedding to file
'''
to_write = ""
for i in range(570):
    ent_name = strip_entity[i]
    ent_name = ent_name.lower()
    new_line = ent_name
    ent_embedding = predicate_embedding[:,i]
    for value in ent_embedding:
        new_line += ' {}'.format(value)
    to_write += new_line + '\n'
to_write = to_write[:-1]
to_write = '570 230' + to_write
with open("VM pred_emb.txt", "w") as f:
    f.write(to_write)

In [27]:
from gensim.models import KeyedVectors
vm = KeyedVectors.load_word2vec_format("VM pred_emb.txt", binary=False)

In [28]:
print(vm.most_similar("veronica_mars")) # Does work
d1 = ["veronica_mars"]
d2 = ["duncan_kane"]
print(vm.wmdistance(d1,d2)) 
print("veronica_mars" in vm.vocab) # True
# Refer to https://radimrehurek.com/gensim/models/keyedvectors.html for other functions you can call on the model

[('carrie_bishop', 0.5964173674583435), ('keith_mars', 0.5823346376419067), ('wallace_fennel', 0.5100153088569641), ('troy_vandegraff', 0.45617154240608215), ('catherina_lenova', 0.438951313495636), ('logan_echolls', 0.42454880475997925), ('jack', 0.41700372099876404), ('luke_haldeman', 0.41216251254081726), ('casey_gant', 0.38793739676475525), ('hamilton_cho', 0.3872324526309967)]
0.21882325410842893
True


In [29]:
def topicextract(i,j,W): #i is the topic index and j is the number of words extracted from this topic
    s = [str(x) for x in idx_to_word[W[i,:].argsort()[-j:]]]
    return s

In [33]:
for i in range(50):
    for j in range(50):
        print("Distance betweeen topic " + str(i+1) + "," + str(j+1) + " is " + str(vm.wmdistance(topicextract(i,10,W),topicextract(j,10,W))))

Distance betweeen topic 1,1 is 0.0
Distance betweeen topic 1,2 is 0.5384152039451598
Distance betweeen topic 1,3 is 0.6002017073487476
Distance betweeen topic 1,4 is 0.5849214029846549
Distance betweeen topic 1,5 is 0.5457319336109996
Distance betweeen topic 1,6 is 0.6664832327402899
Distance betweeen topic 1,7 is 0.6464959877163529
Distance betweeen topic 1,8 is 0.6180821665342153
Distance betweeen topic 1,9 is 0.48219152159845846
Distance betweeen topic 1,10 is 0.8050884790694712
Distance betweeen topic 1,11 is 0.5853957705230982
Distance betweeen topic 1,12 is 0.6574014703819154
Distance betweeen topic 1,13 is 0.7133408956459368
Distance betweeen topic 1,14 is 0.6758714542300643
Distance betweeen topic 1,15 is 0.7088592940112471
Distance betweeen topic 1,16 is 0.6796335292731497
Distance betweeen topic 1,17 is 0.8184031865359306
Distance betweeen topic 1,18 is 0.5372213172863602
Distance betweeen topic 1,19 is 0.5388932977556348
Distance betweeen topic 1,20 is 0.7096845061140447
Dis

Distance betweeen topic 6,3 is 0.7016236506270089
Distance betweeen topic 6,4 is 0.6108778197365444
Distance betweeen topic 6,5 is 0.7475571965514696
Distance betweeen topic 6,6 is 0.0
Distance betweeen topic 6,7 is 0.7168770717415823
Distance betweeen topic 6,8 is 0.4758949915219723
Distance betweeen topic 6,9 is 0.5628845805976691
Distance betweeen topic 6,10 is 0.7578467071686231
Distance betweeen topic 6,11 is 0.7188581536471422
Distance betweeen topic 6,12 is 0.6853504591699747
Distance betweeen topic 6,13 is 0.8142864153241869
Distance betweeen topic 6,14 is 0.744678303460182
Distance betweeen topic 6,15 is 0.6955685885902125
Distance betweeen topic 6,16 is 0.6739066242470737
Distance betweeen topic 6,17 is 0.9407352288834724
Distance betweeen topic 6,18 is 0.5075885607999396
Distance betweeen topic 6,19 is 0.6607284793535172
Distance betweeen topic 6,20 is 0.5604132504275194
Distance betweeen topic 6,21 is 0.701435464286186
Distance betweeen topic 6,22 is 0.8552024570474756
Dist

Distance betweeen topic 11,16 is 0.7964854787769762
Distance betweeen topic 11,17 is 0.8780486283259392
Distance betweeen topic 11,18 is 0.6443736203510285
Distance betweeen topic 11,19 is 0.4639542478049399
Distance betweeen topic 11,20 is 0.7170968187463233
Distance betweeen topic 11,21 is 0.6165833453325481
Distance betweeen topic 11,22 is 0.8365059743953706
Distance betweeen topic 11,23 is 0.7629509793727876
Distance betweeen topic 11,24 is 0.8837657995130893
Distance betweeen topic 11,25 is 0.726551139800644
Distance betweeen topic 11,26 is 0.7960568197981835
Distance betweeen topic 11,27 is 0.7435631769490242
Distance betweeen topic 11,28 is 0.799563750423336
Distance betweeen topic 11,29 is 0.8764179124337236
Distance betweeen topic 11,30 is 0.6969792322936059
Distance betweeen topic 11,31 is 0.7884731665085793
Distance betweeen topic 11,32 is 0.7673343071809806
Distance betweeen topic 11,33 is 0.6630752963752747
Distance betweeen topic 11,34 is 0.7784826610078812
Distance betwe

Distance betweeen topic 16,2 is 0.6385210607973469
Distance betweeen topic 16,3 is 0.9144932680210804
Distance betweeen topic 16,4 is 0.560398602464017
Distance betweeen topic 16,5 is 0.7640834806374408
Distance betweeen topic 16,6 is 0.6739066242470737
Distance betweeen topic 16,7 is 0.7715047494766182
Distance betweeen topic 16,8 is 0.7674934399348017
Distance betweeen topic 16,9 is 0.755121587012389
Distance betweeen topic 16,10 is 0.5097599059172176
Distance betweeen topic 16,11 is 0.7964854787769762
Distance betweeen topic 16,12 is 0.6429592906318135
Distance betweeen topic 16,13 is 0.7786230833559936
Distance betweeen topic 16,14 is 0.7741598555310649
Distance betweeen topic 16,15 is 0.9310673091903101
Distance betweeen topic 16,16 is 0.0
Distance betweeen topic 16,17 is 0.9548427109550457
Distance betweeen topic 16,18 is 0.7002320216109396
Distance betweeen topic 16,19 is 0.694118490072443
Distance betweeen topic 16,20 is 0.7212753675174407
Distance betweeen topic 16,21 is 0.697

Distance betweeen topic 20,41 is 0.8683202729913256
Distance betweeen topic 20,42 is 0.8231339957453955
Distance betweeen topic 20,43 is 0.8578554624390333
Distance betweeen topic 20,44 is 0.5732312426106405
Distance betweeen topic 20,45 is 0.7508385910678631
Distance betweeen topic 20,46 is 0.7433816385241128
Distance betweeen topic 20,47 is 0.8767632889666611
Distance betweeen topic 20,48 is 0.6310042698446642
Distance betweeen topic 20,49 is 0.5583507455076917
Distance betweeen topic 20,50 is 0.7567199231148385
Distance betweeen topic 21,1 is 0.4710093619071483
Distance betweeen topic 21,2 is 0.6402677054419516
Distance betweeen topic 21,3 is 0.8432457935317927
Distance betweeen topic 21,4 is 0.6134523155735134
Distance betweeen topic 21,5 is 0.6748539322907208
Distance betweeen topic 21,6 is 0.701435464286186
Distance betweeen topic 21,7 is 0.6103417533200263
Distance betweeen topic 21,8 is 0.7167867072003187
Distance betweeen topic 21,9 is 0.6024684376443119
Distance betweeen topi

Distance betweeen topic 25,34 is 0.6909773808393954
Distance betweeen topic 25,35 is 0.7488352032047271
Distance betweeen topic 25,36 is 0.5208434752032025
Distance betweeen topic 25,37 is 1.032851781817586
Distance betweeen topic 25,38 is 0.46224842287266243
Distance betweeen topic 25,39 is 0.8920417764151571
Distance betweeen topic 25,40 is 0.5160242189729611
Distance betweeen topic 25,41 is 0.966911189848194
Distance betweeen topic 25,42 is 0.907099082414848
Distance betweeen topic 25,43 is 0.8586298164096028
Distance betweeen topic 25,44 is 0.5534275449486765
Distance betweeen topic 25,45 is 0.7817800723273752
Distance betweeen topic 25,46 is 0.7482633848135589
Distance betweeen topic 25,47 is 0.8237891440573571
Distance betweeen topic 25,48 is 0.7985841818519712
Distance betweeen topic 25,49 is 0.7600628360728774
Distance betweeen topic 25,50 is 0.7827815654522078
Distance betweeen topic 26,1 is 0.7811317103075027
Distance betweeen topic 26,2 is 0.7704885906164645
Distance betweee

Distance betweeen topic 30,26 is 0.821834983790314
Distance betweeen topic 30,27 is 0.8126942499416918
Distance betweeen topic 30,28 is 0.6022835645795344
Distance betweeen topic 30,29 is 0.8031299718776236
Distance betweeen topic 30,30 is 0.0
Distance betweeen topic 30,31 is 0.6542434670281408
Distance betweeen topic 30,32 is 0.8267747175987786
Distance betweeen topic 30,33 is 0.5693158428417802
Distance betweeen topic 30,34 is 0.7837063725877165
Distance betweeen topic 30,35 is 0.7021385723049163
Distance betweeen topic 30,36 is 0.637913043713577
Distance betweeen topic 30,37 is 0.9708088963660223
Distance betweeen topic 30,38 is 0.6652814036935567
Distance betweeen topic 30,39 is 0.9532164114357708
Distance betweeen topic 30,40 is 0.6465436959586517
Distance betweeen topic 30,41 is 0.7993718405073355
Distance betweeen topic 30,42 is 0.8699416644924148
Distance betweeen topic 30,43 is 0.8910197096412839
Distance betweeen topic 30,44 is 0.5759463055915416
Distance betweeen topic 30,45

Distance betweeen topic 35,9 is 0.6572810906688692
Distance betweeen topic 35,10 is 0.7218149500446318
Distance betweeen topic 35,11 is 0.6420056205033333
Distance betweeen topic 35,12 is 0.5662706272653936
Distance betweeen topic 35,13 is 0.7055534108607615
Distance betweeen topic 35,14 is 0.6631972432213784
Distance betweeen topic 35,15 is 0.8322795230796813
Distance betweeen topic 35,16 is 0.5828698700468297
Distance betweeen topic 35,17 is 0.8428972238780974
Distance betweeen topic 35,18 is 0.5232813687648772
Distance betweeen topic 35,19 is 0.5413070920383454
Distance betweeen topic 35,20 is 0.6691948315696532
Distance betweeen topic 35,21 is 0.6094907733528137
Distance betweeen topic 35,22 is 0.8631084091617583
Distance betweeen topic 35,23 is 0.8163098804626463
Distance betweeen topic 35,24 is 0.8777496690792033
Distance betweeen topic 35,25 is 0.7488352032047271
Distance betweeen topic 35,26 is 0.8049078035812376
Distance betweeen topic 35,27 is 0.7850178083926201
Distance betw

Distance betweeen topic 39,50 is 0.7930179515309516
Distance betweeen topic 40,1 is 0.6405076578939948
Distance betweeen topic 40,2 is 0.7755789970587398
Distance betweeen topic 40,3 is 0.619173955726057
Distance betweeen topic 40,4 is 0.8592145999771054
Distance betweeen topic 40,5 is 0.630087966574564
Distance betweeen topic 40,6 is 0.7648742926693828
Distance betweeen topic 40,7 is 0.5520889236711204
Distance betweeen topic 40,8 is 0.6955521491752472
Distance betweeen topic 40,9 is 0.5786602316296101
Distance betweeen topic 40,10 is 0.8867689252710442
Distance betweeen topic 40,11 is 0.729321120589386
Distance betweeen topic 40,12 is 0.8997846318821326
Distance betweeen topic 40,13 is 0.7656915276945371
Distance betweeen topic 40,14 is 0.5476210973017694
Distance betweeen topic 40,15 is 0.5964090672752683
Distance betweeen topic 40,16 is 0.9278564035243606
Distance betweeen topic 40,17 is 0.846014575941443
Distance betweeen topic 40,18 is 0.7300039159468786
Distance betweeen topic 4

Distance betweeen topic 45,17 is 0.9116036023391841
Distance betweeen topic 45,18 is 0.8334261607918618
Distance betweeen topic 45,19 is 0.7802296274893283
Distance betweeen topic 45,20 is 0.7508385910678631
Distance betweeen topic 45,21 is 0.8094300683189032
Distance betweeen topic 45,22 is 0.9500539471217154
Distance betweeen topic 45,23 is 0.8563137926936029
Distance betweeen topic 45,24 is 1.0747769878648117
Distance betweeen topic 45,25 is 0.7817800723273752
Distance betweeen topic 45,26 is 0.7863105053969024
Distance betweeen topic 45,27 is 0.7356875650719434
Distance betweeen topic 45,28 is 0.7937402176410316
Distance betweeen topic 45,29 is 0.7963585454135468
Distance betweeen topic 45,30 is 0.746860311642885
Distance betweeen topic 45,31 is 0.8822634797481892
Distance betweeen topic 45,32 is 0.7252833542853536
Distance betweeen topic 45,33 is 0.8341069631891369
Distance betweeen topic 45,34 is 0.8489810954981921
Distance betweeen topic 45,35 is 0.7227832909427642
Distance betw

Distance betweeen topic 50,14 is 0.7331302528891671
Distance betweeen topic 50,15 is 0.7139543959157861
Distance betweeen topic 50,16 is 0.8517403643194226
Distance betweeen topic 50,17 is 0.7029517339376408
Distance betweeen topic 50,18 is 0.6485610363300638
Distance betweeen topic 50,19 is 0.7409244854684783
Distance betweeen topic 50,20 is 0.7567199231148385
Distance betweeen topic 50,21 is 0.8526971652657891
Distance betweeen topic 50,22 is 0.7720313811672407
Distance betweeen topic 50,23 is 0.6943994980372065
Distance betweeen topic 50,24 is 1.0191071722236016
Distance betweeen topic 50,25 is 0.7827815654522078
Distance betweeen topic 50,26 is 0.5448416044703395
Distance betweeen topic 50,27 is 0.850723787392397
Distance betweeen topic 50,28 is 0.8155390324386133
Distance betweeen topic 50,29 is 0.7064316399297148
Distance betweeen topic 50,30 is 0.8890195402221461
Distance betweeen topic 50,31 is 0.4214998839553489
Distance betweeen topic 50,32 is 0.6765237247976059
Distance betw