# Check reasoning

In [1]:
import pickle
from tqdm import tqdm

In [2]:
data_info = pickle.load(open("../data/movie/preprocessed_data_info_64", 'rb'))
entities = open("../data/movie/entities.txt").readlines()
movies = open("../data/movie/moviesIdx.txt").readlines()

In [3]:
ripple_set = data_info[5]

dict_entities = {}
for entity in entities:
    x = entity.strip().split()
    entity_id = x[0]
    entity_name = x[1].replace('http://dbpedia.org/resource/', '').replace("_", " ")
    
    dict_entities[entity_id] = entity_name
    
for movie in movies:
    x = movie.strip().split()
    movie_id = x[0]
    movie_name = x[1].replace('http://dbpedia.org/resource/', '').replace("_", " ")
    
    dict_entities[movie_id] = movie_name

In [4]:
relations = {"http://dbpedia.org/ontology/director":0, 
            "http://dbpedia.org/ontology/writer":2, 
            "http://dbpedia.org/ontology/starring":4, 
            "http://dbpedia.org/ontology/creator":6, 
            "http://dbpedia.org/ontology/subject":8, 
            "http://dbpedia.org/ontology/music_composer":10, 
            "http://dbpedia.org/ontology/country":12, 
            "http://dbpedia.org/ontology/story":14, 
            "http://dbpedia.org/ontology/studio":16, 
            "http://dbpedia.org/ontology/genre":18, 
            "http://dbpedia.org/ontology/spouse":20, 
            "http://dbpedia.org/ontology/relative":21}
 
for k, v in relations.items():
    relation_id = str(v + 200000)
    inv_relation_id = str(v + 200001)
    
    relation_name = k.replace('http://dbpedia.org/ontology/', '').replace("_", " ")
    inv_relation_name = "Inverse " + relation_name
    
    dict_entities[relation_id] = relation_name
    dict_entities[inv_relation_id] = inv_relation_name

# Make String version

In [5]:
s_ripple_set = []

for user in tqdm(ripple_set):
    
    s_ripple_hop = []
    for ripple_hop in ripple_set[user]:
        heads, relations, tails = ripple_hop
        
        s_heads = [dict_entities[str(x)] for x in heads]
        s_relations = [dict_entities[str(x)] for x in relations]
        s_tails = [dict_entities[str(x)] for x in tails]
                
        s_ripple_hop.append(["{} => {} => {}".format(s_heads[i], s_relations[i], s_tails[i]) for i in range(0, len(heads))])
    
    s_ripple_set.append(s_ripple_hop)

100%|██████████| 137588/137588 [00:54<00:00, 2514.07it/s]


# Make Inference

In [6]:
def intersection(lst1, lst2): 
  
    # Use of hybrid method 
    temp = set(lst2) 
    lst3 = [value for value in lst1 if value in temp] 
    return list(set(lst3)) 

In [7]:
inferenced_ripple_set = []
for user in tqdm(ripple_set):
    
    try:
        ripple_hop_0 = ripple_set[user][0]
        ripple_hop_1 = ripple_set[user][1]
    except:
        continue
        
    intersect_t0_h1 = intersection(ripple_hop_0[2], ripple_hop_1[0])
    intersect_path = []
    for entity in intersect_t0_h1:
        
        # Check head 0
        hop_0_tails = ripple_hop_0[2]
        hop_1_heads = ripple_hop_1[0]
        
        for i in range(0, len(hop_0_tails)):
            if entity == hop_0_tails[i]:
                
                h0 = ripple_hop_0[0][i]
                r0 = ripple_hop_0[1][i]
                t0 = ripple_hop_0[2][i] 
                break
        
        # Check tail 1
        for i in range(0, len(hop_1_heads)):
            if entity == hop_1_heads[i]:
                
                h1 = ripple_hop_1[0][i]
                r1 = ripple_hop_1[1][i]
                t1 = ripple_hop_1[2][i]  
                break
        
        path = (h0, r0, h1, r1, t1)
        path_s = " => ".join([dict_entities[str(x)] for x in path])
        intersect_path.append((path, path_s))
        
    inferenced_ripple_set.append(intersect_path)            

100%|██████████| 137588/137588 [00:15<00:00, 8679.98it/s]


# Make history dict

ratings = open("../data/movie/ratings_re.csv", encoding="utf-8").readlines()

threshold = 4
user_preference_history = {}

for line in tqdm(ratings):
    user, movie_id, rating = line.strip().split(",")[:3]
    
    if int(user) not in user_preference_history:
        user_preference_history[int(user)] = []
    
    if float(rating) >= threshold:
        user_preference_history[int(user)].append(dict_entities[str(movie_id)])

ratings = open("../data/movie/ratings_final.txt", encoding="utf-8").readlines()

user_preference_history = {}

for line in tqdm(ratings):
    user, movie_id, rating = line.strip().split("\t")
    
    if int(user) not in user_preference_history:
        user_preference_history[int(user)] = []
    
    if float(rating) == 1:
        user_preference_history[int(user)].append(dict_entities[str(movie_id)])

In [8]:
import numpy as np

In [9]:
train_data = data_info[0]
eval_data = data_info[1]
test_data = data_info[2]

history = np.concatenate((train_data, eval_data, test_data))

In [10]:
user_preference_history = {}

for line in tqdm(history):
    user, movie_id, rating = line
    
    if int(user) not in user_preference_history:
        user_preference_history[int(user)] = []
    
    if float(rating) == 1:
        user_preference_history[int(user)].append(dict_entities[str(movie_id)])

100%|██████████| 13724776/13724776 [00:36<00:00, 379167.21it/s]


# Test

In [11]:
sample_user = 1

### Test the string version

In [12]:
sorted(s_ripple_set[sample_user][0])

['Abbott and Costello Meet Frankenstein => director => Charles Barton (director)',
 'Abbott and Costello Meet Frankenstein => genre => Comedy',
 'Abbott and Costello Meet Frankenstein => genre => Horror',
 'Abbott and Costello Meet Frankenstein => starring => Béla Lugosi',
 'Abbott and Costello Meet Frankenstein => starring => Glenn Strange',
 'Abbott and Costello Meet Frankenstein => starring => Lon Chaney, Jr.',
 'Abbott and Costello Meet Frankenstein => starring => Lou Costello',
 'Abbott and Costello Meet Frankenstein => writer => John Grant (screenwriter)',
 'Alien (film) => genre => Sci-Fi',
 'Back to the Future => genre => Adventure',
 'Back to the Future => genre => Comedy',
 'Blade Runner => genre => Action',
 'Blade Runner => genre => Thriller',
 'Boogie Nights => music composer => Michael Penn',
 'Boogie Nights => starring => Burt Reynolds',
 'Boogie Nights => starring => Nicole Ari Parker',
 'Boogie Nights => starring => William H. Macy',
 'Dark City (1998 film) => genre =>

## User history

In [18]:
sorted(user_preference_history[sample_user + 1])

['2001: A Space Odyssey (film)',
 'Abbott and Costello Meet Frankenstein',
 'Alien (film)',
 'Any Given Sunday',
 'Back to the Future',
 'Blade Runner',
 'Boogie Nights',
 'Braveheart (1925 film)',
 'Dark City (1998 film)',
 'Fantastic Voyage',
 'Friday the 13th (2009 film)',
 'From Russia with Love (film)',
 'Grumpier Old Men',
 'Grumpy Old Men (film)',
 'Hellraiser',
 'Jurassic Park (film)',
 'Legends of the Fall',
 'Men in Black (film series)',
 "Mr. Holland's Opus",
 'North by Northwest',
 'Return of the Fly',
 'Rules of Engagement (film)',
 'Runaway (2010 film)',
 'Stand by Me (film)',
 'Star Trek: First Contact',
 'Terminator 2: Judgment Day',
 'U-571 (film)',
 'Voyage to the Bottom of the Sea']

### Test the inferenced path

In [14]:
inferenced_ripple_set[sample_user]

[((11317, 200018, 15458, 200019, 8341),
  'Runaway (2010 film) => genre => Comedy => Inverse genre => Made (2001 film)'),
 ((7815, 200018, 15460, 200019, 1516),
  'Legends of the Fall => genre => Drama => Inverse genre => Besieged (film)'),
 ((9620, 200018, 15493, 200019, 13423),
  'North by Northwest => genre => Romance => Inverse genre => The Last Five Years (film)'),
 ((9620, 200018, 15529, 200019, 15095),
  'North by Northwest => genre => Adventure => Inverse genre => Wings in the Dark'),
 ((6092, 200018, 15534, 200019, 10365),
  'Hellraiser => genre => Horror => Inverse genre => Piranha II: The Spawning'),
 ((3436, 200018, 15662, 200019, 3312),
  'Dark City (1998 film) => genre => Mystery => Inverse genre => Cry Wolf (2005 film)'),
 ((12451, 200018, 15472, 200019, 12728),
  'Star Trek: First Contact => genre => Thriller => Inverse genre => Suicide Kings'),
 ((12451, 200018, 15471, 200019, 10390),
  'Star Trek: First Contact => genre => Action => Inverse genre => Planet of the Apes

for i in tqdm(range(1, 10000)):
    
    try:
        user_ripple_0 = s_ripple_set[i][0]
        kg_history = [x.split(" => ")[0] for x in user_ripple_0]
    except:
        kg_history = []
    
    try:
        user_history = user_preference_history[i+1]
    except:
        user_history = []
    
    print(intersection(kg_history, user_history))
    