# Check reasoning

In [1]:
import pickle
from tqdm import tqdm

In [2]:
data_info = pickle.load(open("../data/movie/preprocessed_data_info_64", 'rb'))
entities = open("../data/movie/entities.txt").readlines()
movies = open("../data/movie/moviesIdx.txt").readlines()

In [3]:
ripple_set = data_info[5]

dict_entities = {}
for entity in entities:
    x = entity.strip().split()
    entity_id = x[0]
    entity_name = x[1].replace('http://dbpedia.org/resource/', '').replace("_", " ")
    
    dict_entities[entity_id] = entity_name
    
for movie in movies:
    x = movie.strip().split()
    movie_id = x[0]
    movie_name = x[1].replace('http://dbpedia.org/resource/', '').replace("_", " ")
    
    dict_entities[movie_id] = movie_name

In [4]:
relations = {"http://dbpedia.org/ontology/director":0, 
            "http://dbpedia.org/ontology/writer":2, 
            "http://dbpedia.org/ontology/starring":4, 
            "http://dbpedia.org/ontology/creator":6, 
            "http://dbpedia.org/ontology/subject":8, 
            "http://dbpedia.org/ontology/music_composer":10, 
            "http://dbpedia.org/ontology/country":12, 
            "http://dbpedia.org/ontology/story":14, 
            "http://dbpedia.org/ontology/studio":16, 
            "http://dbpedia.org/ontology/genre":18, 
            "http://dbpedia.org/ontology/spouse":20, 
            "http://dbpedia.org/ontology/relative":21}
 
for k, v in relations.items():
    relation_id = str(v + 200000)
    inv_relation_id = str(v + 200001)
    
    relation_name = k.replace('http://dbpedia.org/ontology/', '').replace("_", " ")
    inv_relation_name = "Inverse " + relation_name
    
    dict_entities[relation_id] = relation_name
    dict_entities[inv_relation_id] = inv_relation_name

# Make String version

In [5]:
s_ripple_set = []

for user in tqdm(ripple_set):
    
    s_ripple_hop = []
    for ripple_hop in ripple_set[user]:
        heads, relations, tails = ripple_hop
        
        s_heads = [dict_entities[str(x)] for x in heads]
        s_relations = [dict_entities[str(x)] for x in relations]
        s_tails = [dict_entities[str(x)] for x in tails]
                
        s_ripple_hop.append(["{} => {} => {}".format(s_heads[i], s_relations[i], s_tails[i]) for i in range(0, len(heads))])
    
    s_ripple_set.append(s_ripple_hop)

100%|██████████| 137588/137588 [00:58<00:00, 2338.58it/s]


### Test the string version

In [6]:
sample_user = 6006

s_ripple_set[sample_user]

[['12 Angry Men (1957 film) => director => Sidney Lumet',
  'Reservoir Dogs => writer => Roger Avary',
  'Black Narcissus => writer => Emeric Pressburger',
  'Badlands (film) => starring => Warren Oates',
  'Saturday Night Fever => music composer => David Shire',
  'House of Games => genre => Crime',
  'Anatomy of a Murder => starring => George C. Scott',
  'Bullitt => genre => Action',
  'To Kill a Mockingbird (film) => genre => Drama',
  'Shane (film) => starring => Brandon deWilde',
  'For a Few Dollars More => writer => Sergio Leone',
  'Rashomon => genre => Crime',
  'Assault on Precinct 13 (2005 film) => starring => Ja Rule',
  'Dracula (1968 film) => starring => Bernard Archard',
  'Saving Private Ryan => genre => War',
  'Saturday Night Fever => genre => Romance',
  'North by Northwest => starring => James Mason',
  'Rashomon => director => Akira Kurosawa',
  'Mary Poppins (film) => music composer => Robert B. Sherman',
  'Fargo (film) => starring => Steve Buscemi',
  'Frequenc

# Make Inference

In [7]:
def intersection(lst1, lst2): 
  
    # Use of hybrid method 
    temp = set(lst2) 
    lst3 = [value for value in lst1 if value in temp] 
    return list(set(lst3)) 

In [8]:
inferenced_ripple_set = []
for user in tqdm(ripple_set):
    
    try:
        ripple_hop_0 = ripple_set[user][0]
        ripple_hop_1 = ripple_set[user][1]
    except:
        continue
        
    intersect_t0_h1 = intersection(ripple_hop_0[2], ripple_hop_1[0])
    intersect_path = []
    for entity in intersect_t0_h1:
        
        # Check head 0
        hop_0_tails = ripple_hop_0[2]
        hop_1_heads = ripple_hop_1[0]
        
        for i in range(0, len(hop_0_tails)):
            if entity == hop_0_tails[i]:
                
                h0 = ripple_hop_0[0][i]
                r0 = ripple_hop_0[1][i]
                t0 = ripple_hop_0[2][i] 
                break
        
        # Check tail 1
        for i in range(0, len(hop_1_heads)):
            if entity == hop_1_heads[i]:
                
                h1 = ripple_hop_1[0][i]
                r1 = ripple_hop_1[1][i]
                t1 = ripple_hop_1[2][i]  
                break
        
        path = (h0, r0, h1, r1, t1)
        path_s = " => ".join([dict_entities[str(x)] for x in path])
        intersect_path.append((path, path_s))
        
    inferenced_ripple_set.append(intersect_path)            

100%|██████████| 137588/137588 [00:16<00:00, 8527.32it/s]


### Test the inferenced path

In [9]:
sample_user = 6006

inferenced_ripple_set[sample_user]

[((6394, 200018, 15459, 200019, 11834),
  'House of Games => genre => Crime => Inverse genre => Shiner (2004 film)'),
 ((13891, 200018, 15460, 200019, 4368),
  "To Kill a Mockingbird (film) => genre => Drama => Inverse genre => Employees' Entrance"),
 ((11468, 200018, 15493, 200019, 8221),
  'Saturday Night Fever => genre => Romance => Inverse genre => Love and Other Troubles'),
 ((2257, 200018, 15471, 200019, 1669),
  'Bullitt => genre => Action => Inverse genre => Bionicle: Mask of Light'),
 ((5167, 200018, 15472, 200019, 550),
  'Frequency (film) => genre => Thriller => Inverse genre => Alive (Meshuggah video)'),
 ((11494, 200018, 15473, 200019, 12957),
  'Saving Private Ryan => genre => War => Inverse genre => Taking Chance'),
 ((2329, 200018, 15857, 200019, 9781),
  'Cabaret (upcoming film) => genre => Musical => Inverse genre => On Moonlight Bay (film)')]