### Comparison between Recommended Systems

**Data and libraries:**

In [1]:
#Libraries for time computation and data importation:
import time
import os

#Libraries to manage the Interaction Matrix:
import pandas as pd
import numpy as np

#Utility Functions:
import utils
from utils import *

#Data Structures:
import DataStructures
from DataStructures import *

In [2]:
#Data importation:
path = os.path.join(os.getcwd(), "data_s.csv")
df = pd.read_csv(path)

**Collaborative user-based:**

In [9]:
#Select only users with a certain number of features to make the comparison:
def select(data, target, threshold = 0.33):
    valid = data.iloc[target][np.isnan(data.iloc[target]) == False].index
    r = len(valid)
    
    unmatches = data[valid].drop(target).isnull().sum(axis = 1)
    acceptable = unmatches[unmatches <= round((1 - threshold)*r, 0)].index
    
    if len(acceptable) == 0:
        return data.drop(target).index
    
    return (acceptable, valid)
    
#Algorithm to order the nearest neighbors:
def nearestUB(data, target):
    indexes, valid = select(data, target)
    
    distances = MaxHeap()
    for user in indexes:
        distances.insert(cosine(data.iloc[target].values, data.iloc[user].values), user)
            
    return (distances, valid)

#Assign a predicted rating to an unseen item:
def compute_ratings(data, neighbors, valid, target, k = 10):
    predictions = list()
    
    unseen = data.columns.drop(valid)
    
    nearest = list()
    for iteration in range(k):
        nearest.append(neighbors.delete()[1])
    
    for item in unseen:
        rating = False
        n_comparison = 0
        for iteration in range(k):
            neighbor = nearest[iteration]
            if item in data.iloc[neighbor][np.isnan(data.iloc[neighbor]) == False].index:
                rating += data.iloc[neighbor][item]
                n_comparison += 1
        if rating:
            rating = rating/n_comparison
            predictions.append((rating, item))
    
    return predictions

#Return suggestions:
def suggestUB(target, ratings):    
    recommandations = {
        "Strongly Recommended" : list(),
        "Recommended" : list(),
        "Not Recommended" : list()
    }
    
    while len(ratings) > 0:
        rating, item = ratings.pop()
        if rating >= 4:
            recommandations["Strongly Recommended"].append(item)
        elif rating >= 3:
            recommandations["Recommended"].append(item)
        else:
            recommandations["Not Recommended"].append(item)
    
    return recommandations

**Collaborative Matrix Factorization:**

In [4]:
#Non-negative Factorization of the Interaction Matrix:
def factorization(data, n_factors, steps = 200, alpha = 0.0002, beta = 0.02):
    n = data.shape[0]
    m = data.shape[1]
    data = np.array(data)
    U = np.random.rand(n, n_factors)
    I = np.random.rand(n_factors, m)
    
    for iteration in range(steps):
        for user in range(n):
            for item in range(m):
                if data[user][item] > 0:
                    eij = data[user][item] - np.dot(U[user, :], I[:, item])
                    for factor in range(n_factors):
                        U[user][factor] = U[user][factor] + 2*alpha*(eij * I[factor][item] - beta * U[user][factor])
                        I[factor][item] = I[factor][item] + 2*alpha*(eij * U[user][factor] - beta * I[factor][item])
        E = 0
        for user in range(n):
            for item in range(m):
                if data[user][item] > 0:
                    E = E + (data[user][item] - np.dot(U[user, :], I[:, item]))**2
                    for factor in range(n_factors):
                        E = E + (beta/2) * ((U[user][factor])**2 + (I[factor][item])**2)
        if E < 0.001:
            break
            
    return (U, I, E)

def suggestMF(data, factorized, target):
    unseen = data.iloc[target][np.isnan(data.iloc[target])].index
    
    recommandations = {
    "Strongly Recommended" : list(),
    "Recommended" : list(),
    "Not Recommended" : list()
    }
    
    for item in unseen:
        rating = factorized.iloc[target][item]
        
        if rating >= 4:
            recommandations["Strongly Recommended"].append(item)
        elif rating >= 3:
            recommandations["Recommended"].append(item)
        else:
            recommandations["Not Recommended"].append(item)

    return recommandations

**Collaborative item-based:**

In [5]:
#Algorithm to select the film preferred by a user:
def search_favourite(data, target, unseen):
    valid_i = data.iloc[target].drop(unseen).index
    valid_r = data.iloc[target][valid_i]
    
    favourites = Queue_c(len(valid_i) + 1)
    favourites.enqueue(valid_r[0], valid_i[0])
    best = valid_r[0]
    
    for (rating, item) in zip(valid_r, valid_i):
        if rating >= best:
            favourites.enqueue(rating, item)
            best = rating

    while favourites.first_in()[0] < best:
        favourites.dequeue()
    
    return favourites

def nearestIB(data, target, favourites, unseen):    
    nearest = {}
    while not favourites.is_empty():
        favourite = favourites.dequeue()[1]
        
        neighbors = MaxHeap()

        for item in unseen:
            neighbors.insert(cosine(data[item].values, data[favourite].values), item)

        nearest[favourite] = neighbors
        
    return nearest

def suggestIB(target, nearest, unseen, k = 3):
    suggestions = {}
    unseen = list(unseen)
    
    for favourite in nearest.keys():
        iteration = k
        neighbors = nearest[favourite]
        
        while (iteration > 0) and (neighbors.size > 0):
            suggested = neighbors.delete()[1]
            if suggested not in suggestions.keys():
                suggestions[suggested] = "Recommended"
                unseen.remove(suggested)
            else:
                suggestions[suggested] = "Strongly Recommended"
            iteration -= 1
    
    for item in unseen:
        suggestions[item] = "Not Recommended"
    
    return suggestions

### Functions for the comparison:

In [6]:
#Function to evaluate similarity between recommendations:
def encoding(data, target, predictions):
    """
    FUNCTION:
    Turns the Dictionary of recommendations into a vector that associates a class of suggestion to each item.
    
    PARAMETERS:
    data -> Pandas Dataframe with the user-item Interaction Matrix.
    target -> Target user.
    predictions -> Dictionary containing the suggesitons.

    RETURNS:
    Dictionary containing the suggested items with their proper class of suggestion.
    """
    unseen = data.iloc[target][np.isnan(data.iloc[target])].index
    
    suggestions = {}
    for item in unseen:
        if item in predictions["Strongly Recommended"]:
            suggestions[item] = "Strongly Recommended"
        elif item in predictions["Recommended"]:
            suggestions[item] = "Recommended"
        elif item in predictions["Not Recommended"]:
            suggestions[item] = "Not Recommended"
        else:
            suggestions[item] = None
    
    return suggestions

def compare(data, target, suggestion_1, suggestion_2):
    """
    FUNCTION:
    Evaluate similarity between two different recommendations.
    
    PARAMETERS:
    data -> Pandas Dataframe with the user-item Interaction Matrix.
    target -> Target user.
    suggestion_1 -> Dictionary containing the class of suggestions for each item suggested to the target user by the first system.
    suggestion_2 -> Dictionary containing the class of suggestions for each item suggested to the target user by the second system.
    
    RETURNS:
    Measure of similarity.
    """
    score = 0
    unseen = data.iloc[target][np.isnan(data.iloc[target])].index
    
    if len(unseen) > 0:
        for item in suggestion_1.keys():
            if suggestion_1[item] == suggestion_2[item]:
                score += 1
            elif (suggestion_1[item] == "Strongly Recommended" and suggestion_2[item] == "Recommended") or (suggestion_1[item] == "Recommended" and suggestion_2[item] == "Strongly Recommended"):
                score += 0.5

        return score/len(unseen)
    else:
        return 0

**User-based vs Matrix factorization:**

In [7]:
#Train the Factorization:
U, I, E = factorization(df, n_factors = 14, steps = 2000)
factorized = np.dot(U, I)
predictions = pd.DataFrame(factorized, columns = df.columns)

In [10]:
#Compare the class-assignment for all the users:
score = 0

for target in df.index:
    neighbors, valid = nearestUB(df, target)
    ratings = compute_ratings(df, neighbors, valid, target)
    s1 = suggestUB(target, ratings)
    s2 = suggestMF(df, predictions, target)
    
    s1 = encoding(df, target, s1)
    s2 = encoding(df, target, s2)
    
    score += compare(df, target, s1, s2)
    
print("Comparison score: " + str(score/len(df.index)))

Comparison score: 0.6821675035614955


**Item-based vs Matrix Factorization:**

In [11]:
#Compare the class-assignment for all the users:
score = 0

for target in df.index:
    unseen = df.iloc[target][np.isnan(df.iloc[target])].index
    favourites = search_favourite(df, target, unseen)
    neighbors = nearestIB(df, target, favourites, unseen)
    s1 = suggestIB(target, neighbors, unseen)
    
    s2 = suggestMF(df, predictions, target)
    s2 = encoding(df, target, s2)
    
    score += compare(df, target, s1, s2)
    
print("Comparison score: " + str(score/len(df.index)))

Comparison score: 0.2622521222555744


**User-based vs Item-based:**

In [14]:
#Compare the class-assignment for all the users:
score = 0

for target in df.index:
    unseen = df.iloc[target][np.isnan(df.iloc[target])].index
    favourites = search_favourite(df, target, unseen)
    neighbors1 = nearestIB(df, target, favourites, unseen)
    s1 = suggestIB(target, neighbors1, unseen)
    
    neighbors2, valid2 = nearestUB(df, target)
    ratings = compute_ratings(df, neighbors2, valid2, target)
    s2 = suggestUB(target, ratings)    
    
    s2 = encoding(df, target, s2)
    
    score += compare(df, target, s1, s2)
    
print("Comparison score: " + str(score/len(df.index)))

Comparison score: 0.19846954067801634
