### Singular Value Decomposition Example - Recommendation

In [1]:
import pandas as pd
import numpy as np
import pdb
import warnings

from numpy import *
from numpy import linalg as la

warnings.filterwarnings("ignore")

In [2]:
def euclidean_similarity(vector_a, vector_b):

    euclidean_distance = np.linalg.norm(vector_a - vector_b) # Calculate the Euclidean distance
    euclidean_similarity = 1.0 / (1.0 + euclidean_distance) # Convert the Euclidean distance to similarity
    
    return euclidean_similarity

In [3]:
def pearson_similarity(vector_a, vector_b):

    # Compute Pearson correlation coefficient between the two vectors
    if len(vector_a) < 3:
        pearson_similarity = 1.0
    else:
        pearson_similarity = 0.5 + 0.5 * np.corrcoef(vector_a, vector_b, rowvar = 0)[0][1]

    return pearson_similarity

In [4]:
def cosine_similarity(vector_a, vector_b):
 
    dot_product = float(vector_a.T * vector_b) # Calculate the dot product between the two vectors
    denominator  = la.norm(vector_a) * la.norm(vector_b) # Calculate the product of the magnitudes of the vectors

    # Compute cosine similarity and scale it to the range [0, 1]
    if denominator == 0:
        cosine_similarity = 0
    else:
        cosine_similarity = 0.5 + 0.5 * (dot_product / denominator)

    return cosine_similarity

In [5]:
# Define vectors as numpy arrays
A = np.array([2, 3, 0, 1, 0, 4, -5])
B = np.array([0, 1, 2, -4, 2, 0, 3])

# Convert arrays to matrices
A = mat(A)
B = mat(B)

In [6]:
print(euclidean_similarity(A.T, B.T))

0.08333333333333333


In [7]:
print(pearson_similarity(A.T, B.T))

0.2665380020120951


In [8]:
print(cosine_similarity(A.T, B.T))

0.3150010839748479


In [9]:
def standard_estimate(dataMat, user, simMeas, item):

    # Get the number of items
    n = shape(dataMat)[1]

    # Initialize totals for similarity and weighted ratings
    SimTotal = 0.0
    ratSimTotal = 0.0

    for j in range(n):

        # Get the rating of the current item by the user
        userRating = dataMat[user, j]

        if userRating == 0:
            continue
        
        # Find users who rated both the target item and the current item
        overLap = nonzero(logical_and(dataMat[:, item] > 0, dataMat[:, j] > 0))[0]
        
        # Calculate similarity
        if len(overLap) == 0:
            similarity = 0
        else:
            similarity = simMeas(dataMat[overLap, item], dataMat[overLap, j])

        # Accumulate similarity and weighted rating totals
        SimTotal += similarity
        ratSimTotal += similarity * userRating

    if SimTotal == 0:
        return 0
    else:
        return ratSimTotal / SimTotal

In [10]:
def singular_value_decomposition_estimate(dataMat, user, simMeas, item):

    # Get the number of items
    n = shape(dataMat)[1]

    # Initialize totals for similarity and weighted ratings
    SimTotal = 0.0
    ratSimTotal = 0.0

    # Convert to a matrix form
    data = mat(dataMat)

    # Perform SVD on the data matrix
    U, Sigma, VT = la.svd(data)

    # Create a diagonal matrix
    Sig4 = mat(eye(4) * Sigma[:4])

    # Transform the original item data into a lower-dimensional space
    xformedItems = data.T * U[:, :4] * Sig4.I

    for j in range(n):

        # Get the rating of the current item by the user
        userRating = data[user, j]

        if userRating == 0 or j == item:
            continue
        
        # Calculate similarity between the target item and the current item in the reduced space
        similarity = simMeas(xformedItems[item, :].T, xformedItems[j, :].T)

        # Accumulate similarity and weighted rating totals
        SimTotal += similarity
        ratSimTotal += similarity * userRating

    if SimTotal == 0:
        return 0
    else:
        return ratSimTotal / SimTotal

In [11]:
def recommend(dataMat, user, N = 3, simMeas = cosine_similarity, estMethod = standard_estimate):

    # Find indices of items the user has not rated
    unratedItems = nonzero(dataMat[user, :].A == 0)[1]

    if len(unratedItems) == 0:
        return 'You rated everything'
    
    # List to store
    itemScores = []

    for item in unratedItems:

        # Estimate the score for the unrated item
        estimatedScore = estMethod(dataMat, user, simMeas, item)

        # Append the item and its estimated score
        itemScores.append((item, estimatedScore))

    return sorted(itemScores, key = lambda jj: jj[1], reverse = True)[:N]

In [12]:
def load_data():

    matrix = [[0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 5],
              [0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 3],
              [0, 0, 0, 0, 4, 0, 0, 1, 0, 4, 0],
              [3, 3, 4, 0, 0, 0, 0, 2, 2, 0, 0],
              [5, 4, 5, 0, 0, 0, 0, 5, 5, 0, 0],
              [0, 0, 0, 0, 5, 0, 1, 0, 0, 5, 0],
              [4, 3, 4, 0, 0, 0, 0, 5, 5, 0, 1],
              [0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4],
              [0, 0, 0, 2, 0, 2, 5, 0, 0, 1, 2],
              [0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0],
              [1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0],
              [2, 1, 0, 2, 0, 5, 3, 0, 1, 0, 1]]
    
    return(np.mat(matrix))

In [13]:
data = load_data()
print(data)

[[0 0 0 0 0 4 0 0 0 0 5]
 [0 0 0 3 0 4 0 0 0 0 3]
 [0 0 0 0 4 0 0 1 0 4 0]
 [3 3 4 0 0 0 0 2 2 0 0]
 [5 4 5 0 0 0 0 5 5 0 0]
 [0 0 0 0 5 0 1 0 0 5 0]
 [4 3 4 0 0 0 0 5 5 0 1]
 [0 0 0 4 0 4 0 0 0 0 4]
 [0 0 0 2 0 2 5 0 0 1 2]
 [0 0 0 0 5 0 0 0 0 4 0]
 [1 0 0 0 0 0 0 1 2 0 0]
 [2 1 0 2 0 5 3 0 1 0 1]]


In [14]:
D = mat(data) # Convert to a matrix format
U, Sigma, VT = la.svd(D) # Perform SVD on the matrix
Sig4 = mat(eye(4) * Sigma[:4]) # Create a diagonal matrix
xItems = data.T * U[:, :4] * Sig4.I # Transform the original items into a lower-dimensional space
print(xItems)

[[-0.45889187  0.03170418 -0.01809311  0.11036907]
 [-0.3622062   0.04692163 -0.01141864  0.04254964]
 [-0.45537578  0.10423397 -0.00800224 -0.05403528]
 [-0.051868   -0.39701598 -0.05950012  0.06753374]
 [-0.01726089 -0.08392364  0.71965471 -0.13098077]
 [-0.09964753 -0.67126432 -0.11207725 -0.04038616]
 [-0.04619366 -0.25745027  0.05860349  0.87744841]
 [-0.45397947  0.09523267  0.03757744 -0.09430203]
 [-0.46909953  0.0672883  -0.0131357   0.00911101]
 [-0.01955354 -0.10798751  0.67233514  0.01344801]
 [-0.09629148 -0.52832652 -0.09176174 -0.42505074]]


In [15]:
user = 4
recommendataion = recommend(data, user, N = 4, simMeas = cosine_similarity, estMethod = standard_estimate)
print(recommendataion)

[(4, 5.0), (9, 5.0), (10, 4.804196825932594), (3, 4.666666666666667)]


In [16]:
print("Recommended Item for User", user)
for i, p in recommendataion:
    print("Item ", i, "with predicted rating: ", round(p, 2))

Recommended Item for User 4
Item  4 with predicted rating:  5.0
Item  9 with predicted rating:  5.0
Item  10 with predicted rating:  4.8
Item  3 with predicted rating:  4.67


In [17]:
user = 4
recommendataion = recommend(data, user, N = 4, simMeas = cosine_similarity, estMethod = singular_value_decomposition_estimate)
print(recommendataion)

[(10, 4.808129974963377), (4, 4.807852789245039), (9, 4.8035168885389705), (5, 4.795384164777592)]


In [18]:
print("Recommended Item for User", user)
for i, p in recommendataion:
    print("Item ", i, "with predicted rating: ", round(p, 2))

Recommended Item for User 4
Item  10 with predicted rating:  4.81
Item  4 with predicted rating:  4.81
Item  9 with predicted rating:  4.8
Item  5 with predicted rating:  4.8
