In [56]:
"""
Author: Myolive_Lin
Description: Code implementation of three methods for calculating user similarity
"""


import sys
import os
import numpy as np

current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)




In [57]:
import importlib
from data_processing import data_generate

importlib.reload(data_generate)

from data_processing.data_generate import generate_random_matrix


In [58]:
# Cosine Similarity
def cosine_similarity(v1,v2):
    """
    Calculate cosine similarity between two vectors
    Args:
        v1 (np.array): vector 1
        v2 (np.array): vector 2
    Returns:
        float: cosine similarity
    """
    dot = float(np.dot(v1,v2))
    norm_v1 = np.linalg.norm(v1)
    norm_v2 = np.linalg.norm(v2)

    return dot/(norm_v1*norm_v2)

In [59]:
def Peason_similarity(vector1,vector2):
    """
    Calculate Peason similarity between two vectors
    Args:
        vector1 (np.array): vector 1
        vector2 (np.array): vector 2
    Returns:
        float: Peason similarity
    """
    
    mean_v1 = np.mean(vector1)
    mean_v2 = np.mean(vector2)

    # Calculate the standard deviation of each vector,
    # The usage of numpy can directly make each vector have an average value
    std_v1 = np.sqrt(np.sum((vector1 - mean_v1) ** 2))
    std_v2 = np.sqrt(np.sum((vector2 - mean_v2) ** 2))

    # Using numpy's broadcast mechanism, first subtract the respective averages from both vectors, 
    # then multiply the corresponding positions and then sum.
    product = np.sum( (vector1 - mean_v1) * (vector2 - mean_v2))

    similarity = product / (std_v1 * std_v2)

    return similarity


In [104]:
#Introducing the similarity calculation formula for the average score of items
def similarity_mean_item(vector1,vector2,matrix):
    """
    Calculate similarity between two vectors
    Args:
        vector1 (np.array): vector 1
        vector2 (np.array): vector 2
        matrix (np.array): matrix
    Returns:
        float: similarity
    """
    

    mean_item = np.mean(matrix,axis = 0)

    product = np.sum((vector1 - mean_item) * (vector2 - mean_item))

    std_v1 = np.sqrt(np.sum((vector1 - mean_item)**2))
    std_v2 = np.sqrt(np.sum((vector2 - mean_item)**2))

    similarity = product / (std_v1 * std_v2)

    return similarity

In [130]:
def Calculate_Top_k_Recommendation(user_vecotr, matrix,k):
    res = []
    for i in range(len(matrix)):
        res.append((i,cosine_similarity(user_vecotr, matrix[i])))
    
    res.sort(key = lambda x: x[1], reverse = True)
    return res[:k+1]

In [131]:
Calculate_Top_k_Recommendation(matrix[0],matrix,3)

[(0, 0.9999999999999998),
 (3, 0.5773502691896258),
 (1, 0.4999999999999999),
 (2, 0.2886751345948129)]

In [106]:
matrix = generate_random_matrix(4,8,0,1,42)
matrix

array([[0, 1, 0, 0, 0, 1, 0, 0],
       [0, 1, 0, 0, 0, 0, 1, 0],
       [1, 1, 1, 0, 1, 0, 1, 1],
       [1, 1, 1, 1, 1, 1, 0, 0]])