In [1]:
from collections import defaultdict
import pandas as pd
from data.graph_loader import load_graph
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np

print(f'Loading ratings')
mr = pd.read_csv('../data/mindreader/ratings.csv')

# Remove unknown ratings
mr = mr[mr.sentiment != 0]
users = mr.userId.unique()

# Load NX graph
print(f'Loading graph')
g = load_graph('../data/graph/triples.csv', directed=True)

Loading ratings
Loading graph


In [4]:
def _propagate(node, preferences, preference):
    for neighbor in g.neighbors(node):
        preferences[neighbor].append(preference)
        
        # _propagate(neighbor, preferences, preference * 0.5)
            
def propagate(movie_preferences):
    preferences = defaultdict(list)
    
    for movie, preference in movie_preferences.items():
        _propagate(movie, preferences, preference)
            
    return preferences

def reduce_preferences(preferences):
    reduced = dict()
    
    # Reduces preferences from a list of ratings to its majority vote
    for uri, preference_list in preferences.items():
        mean = np.mean(preference_list)
        variance = np.var(preference_list)
        
        prediction = 0 if mean == 0. or variance > 0.5 else 1 if mean > 0 else -1
        if prediction:
            reduced[uri] = prediction
    
    return reduced

def infer_preferences(ratings, user):
    user_ratings = ratings[ratings.userId == user][['uri', 'sentiment', 'isItem']]
    entity_preferences = dict()
    movie_preferences = dict()
    
    for idx, row in user_ratings[user_ratings.isItem].iterrows():
        movie_preferences[row['uri']] = row['sentiment']
        
    for idx, row in user_ratings[~user_ratings.isItem].iterrows():
        entity_preferences[row['uri']] = row['sentiment']
    
    return reduce_preferences(propagate(movie_preferences)), entity_preferences

def predict(user):
    res, actual = infer_preferences(mr, user)
    overlapping = set(res.keys()).intersection(set(actual.keys()))
    tp, fp, tn, fn = [0 for _ in range(4)]

    for key in overlapping:
        if actual[key] == 1 and res[key] == 1:
            tp += 1
        elif actual[key] == -1 and res[key] == 1:
            fp += 1
        elif actual[key] == -1 and res[key] == -1:
            tn += 1
        elif actual[key] == 1 and res[key] == -1:
            fn += 1
    
    return tp, fp, tn, fn

total_tp, total_fp, total_tn, total_fn = [0 for _ in range(4)]

for user in users:
    tp, fp, tn, fn = predict(user)
    
    total_tp += tp
    total_fp += fp
    total_tn += tn
    total_fn += fn

precision = total_tp / (total_tp + total_fp)
recall = total_tp / (total_tp + total_fn)
true_negative = total_tn / (total_tn + total_fp)
accuracy = (total_tp + total_tn) / (total_tp + total_tn + total_fp + total_fn)
f1 = (2 * precision * recall) / (precision + recall)

print(f'Precision: {precision * 100}%')
print(f'Recall: {recall * 100}%')
print(f'True negative rate: {true_negative * 100}%')
print(f'F1: {f1 * 100}%')
print(f'Accuracy: {accuracy * 100}%')


Precision: 91.2625250501002%
Recall: 83.14264849074976%
True negative rate: 63.60601001669449%
F1: 87.0135660149035%
Accuracy: 79.63647258563867%
