In [5]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from itertools import combinations
from sklearn.metrics.pairwise import cosine_similarity

In [10]:
def triangle_prime(user_item_matrix, items=False):
    user_item = user_item_matrix.T if items else user_item_matrix
    nr_users = user_item.shape[0]

    sum_1 = np.array([
        np.sqrt(np.sum(np.square(np.diff(comb, axis=0)))) 
        for comb in combinations(user_item, 2)])
    user_sums = np.sqrt(np.sum(np.square(user_item), axis=1))
    sum_2 = np.array([np.sum(i) for i in combinations(user_sums, 2)])

    sum_3 = 1 - (sum_1 / sum_2)

    tri = np.zeros((nr_users, nr_users))
    tri[np.triu_indices(nr_users, 1)] = sum_3

    return tri

def urp(user_item_matrix, items=False):
    user_item_masked = np.ma.array(
        user_item_matrix.T if items else user_item_matrix)

    nr_users = user_item_masked.shape[0]
    user_item_masked = np.ma.masked_where(
        user_item_masked == 0, 
        user_item_masked)

    means = user_item_masked.mean(axis=1).data
    stds = user_item_masked.std(axis=1).data

    mean_diffs = np.array([
        np.abs(np.diff(mean)) for mean in combinations(means, 2)])
    std_diffs = np.array([
        np.abs(np.diff(std)) for std in combinations(stds, 2)])

    tri_upper = (1 - 1/(1+np.exp(-1 * mean_diffs * std_diffs))).squeeze()

    tri = np.zeros((nr_users, nr_users))
    tri[np.triu_indices(nr_users, 1)] = tri_upper

    return tri

def itr(user_item_matrix, items=False, full_matrix=False):
    triangle_res = triangle_prime(user_item_matrix, items=items)
    # print("Triangle'")
    # print(triangle_res)

    urp_res = urp(user_item_matrix, items=items) 
    # print("URP")
    # print(urp_res)

    itr_res = triangle_res * urp_res

    if full_matrix:
        itr_res += itr_res.T

    return itr_res

In [15]:
movie_df = pd.DataFrame(
    [
        ["Julie", 4., 5., 2.],
        ["Sal", 1., 2., 5., 3.],
        ["Matthew", 4., 4., 2., 4.],
    ],
    columns=["Name", "2001: A Space Odyssey", "Fargo", "Sense and Sensibility", "Lord of the Rings"],
)
movie_df = movie_df.set_index("Name")

movie_df_imputed = movie_df.fillna(movie_df.mean(axis=0))
# movie_df_imputed = movie_df.fillna(0)

display(movie_df)
display(movie_df_imputed)

Unnamed: 0_level_0,2001: A Space Odyssey,Fargo,Sense and Sensibility,Lord of the Rings
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Julie,4.0,5.0,2.0,
Sal,1.0,2.0,5.0,3.0
Matthew,4.0,4.0,2.0,4.0


Unnamed: 0_level_0,2001: A Space Odyssey,Fargo,Sense and Sensibility,Lord of the Rings
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Julie,4.0,5.0,2.0,3.5
Sal,1.0,2.0,5.0,3.0
Matthew,4.0,4.0,2.0,4.0


In [17]:
movie_matrix = movie_df_imputed.values
movie_matrix_norm = movie_matrix - movie_matrix.mean(axis=0)

used_matrix = movie_matrix_norm

num_users = used_matrix.shape[0]

# results_urp = np.zeros((num_users, num_users), dtype=np.float32)
# results_itr = np.zeros((num_users, num_users), dtype=np.float32)

results_itr = itr(used_matrix)

# for i in range(num_users):
#     for j in range(num_users):
        # result_itr[i, j] = itr(user_item[i], user_item[j])
        # results_urp[i, j] = urp(used_matrix[i], used_matrix[j])
        # results_itr[i, j] = itr(used_matrix[i], used_matrix[j])

cosine = cosine_similarity(used_matrix)

print("Results ITR")
print(np.triu(results_itr, k=0), '\n')

print("Results cosine")
print(np.triu(cosine, k=1), '\n')

Results ITR
[[0.         0.00311508 0.32772406]
 [0.         0.         0.00442626]
 [0.         0.         0.        ]] 

Results cosine
[[ 0.         -0.96401393  0.81847186]
 [ 0.          0.         -0.94176249]
 [ 0.          0.          0.        ]] 

