In [42]:
import pandas as pd
import numpy as np

from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds
from scipy.linalg import svd

In [62]:
# Find the highest similarity
def cosine_similarity(v,u):
    sim=(v @ u)/ (np.linalg.norm(v) * np.linalg.norm(u))
    if np.isnan(sim):
        return 0
    else:
        return sim

# Get the factor number given the reconstruction percentage
def get_k(sigma,percentage):
    sigma_sqr=sigma**2 
    sum_sigma_sqr=sum(sigma_sqr) 
    k_sum_sigma=0 
    k=0
    for i in sigma:
        k_sum_sigma+=i**2
        k+=1
        if k_sum_sigma>=sum_sigma_sqr*percentage:            
            return k

In [89]:
def svdEst(testdata,user,item,formed_items):
    n=testdata.shape[1]
    sim_total=0.0
    rat_sim_total=0.0
    
      
    for j in range(n):
        user_rating=testdata[user,j]
        if user_rating==0 or j==item or np.isnan(user_rating):continue
        # the similarity between item and item j
        similarity=cosine_similarity(formed_items[:,item].T,formed_items[:,j].T) 
        
        sim_total+=similarity 
        # product of similarity and the rating of user to item j, then sum
        rat_sim_total+=similarity*user_rating 
    if sim_total==0:
        return 0
    else:
        return np.round(rat_sim_total/sim_total,decimals=3)

def recommend(testdata,user, formed_items):
    unrated_items=np.nonzero(testdata[user,:]==0)[0].tolist()
    if len(unrated_items)==0:
        return print('everything is rated')
    item_scores=[]
    for item in unrated_items:
        estimated_score=svdEst(testdata,user,item,formed_items)
        item_scores.append((item,estimated_score))
    item_scores=sorted(item_scores,key=lambda x:x[1],reverse=True)
    return item_scores

In [98]:
# path with rating
PATH = './data/anime/anime_ratings.dat'

# import data
df = pd.read_csv(PATH, sep='\t')

reviewmatrix = df.pivot_table(index="User_ID", columns="Anime_ID", values="Feedback").fillna(0)
normReviewmatrix=(reviewmatrix-reviewmatrix.min())/(reviewmatrix.max()-reviewmatrix.min())

# convert to a csr matrix
normReviewmatrix = normReviewmatrix.values
# reviewSparse = csr_matrix(reviewSparse)

In [100]:
percentage = 0.9
u,sigma,vt=svd(normReviewmatrix)
# Need to find the best k by exploring
k=get_k(sigma,percentage)

#Construct the diagonal matrix     
sigma_k=np.diag(sigma[:k])    

#Convert the original data to k-dimensional space (lower dimension) according to the value of k. formed_items represents the value of item in k-dimensional space after conversion.
formed_items=np.around(np.dot(np.dot(u[:,:k], sigma_k),vt[:k, :]),decimals=3)   

In [101]:
from datetime import datetime

print(datetime.now().strftime("%H:%M:%S"))
recommendation_0 = recommend(normReviewmatrix,0, formed_items=formed_items)
print(datetime.now().strftime("%H:%M:%S"))

19:33:50
19:34:02


In [102]:
recommendation_0

[(7091, 2.333),
 (6594, 2.246),
 (6240, 1.959),
 (6241, 1.959),
 (6245, 1.959),
 (6246, 1.959),
 (6247, 1.959),
 (6248, 1.959),
 (2094, 1.833),
 (6775, 1.45),
 (5899, 1.43),
 (6506, 1.367),
 (5493, 1.294),
 (5044, 1.272),
 (4331, 1.18),
 (7064, 1.18),
 (6738, 1.178),
 (6739, 1.178),
 (6379, 1.132),
 (6469, 1.121),
 (4646, 1.112),
 (6274, 1.099),
 (7018, 1.095),
 (3060, 1.078),
 (7065, 1.078),
 (7066, 1.078),
 (7068, 1.078),
 (7069, 1.078),
 (6126, 1.077),
 (6127, 1.077),
 (2439, 1.054),
 (6496, 1.037),
 (6498, 1.037),
 (6499, 1.037),
 (6500, 1.037),
 (6501, 1.037),
 (6502, 1.037),
 (6503, 1.037),
 (6786, 1.035),
 (6627, 1.025),
 (6403, 1.018),
 (5907, 1.006),
 (7117, 1.006),
 (7118, 1.006),
 (6381, 0.98),
 (6383, 0.98),
 (6384, 0.98),
 (6260, 0.977),
 (6466, 0.977),
 (6244, 0.971),
 (6309, 0.971),
 (6252, 0.967),
 (6981, 0.965),
 (5194, 0.964),
 (5197, 0.964),
 (5199, 0.964),
 (5207, 0.964),
 (4763, 0.961),
 (6690, 0.96),
 (6692, 0.96),
 (5150, 0.958),
 (5130, 0.954),
 (5141, 0.954),
 

In [104]:
normReviewmatrix

array([[0.8, 0. , 0.5, ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       ...,
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ]])

In [103]:
formed_items

array([[ 0.634,  0.011,  0.302, ..., -0.001,  0.001, -0.001],
       [ 0.048, -0.012,  0.03 , ..., -0.002, -0.005, -0.003],
       [-0.017,  0.001, -0.03 , ..., -0.004,  0.   ,  0.   ],
       ...,
       [ 0.01 ,  0.004, -0.021, ..., -0.   , -0.002,  0.002],
       [ 0.005, -0.002, -0.002, ..., -0.   , -0.   , -0.002],
       [-0.001,  0.001,  0.002, ..., -0.   , -0.   , -0.001]])

In [15]:
unrated_items=np.nonzero(reviewmatrix[0,:]==0)[0].tolist()
    
if len(unrated_items)==0:
    print('everything is rated')
item_scores=[]

In [18]:
len(unrated_items)

7149

In [29]:
df.pivot_table(index="User_ID", columns="Anime_ID", values="Feedback").fillna(0)

Anime_ID,1,2,3,4,5,6,7,8,9,10,...,7376,7377,7378,7379,7380,7381,7382,7383,7385,7390
User_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,8.0,0.0,5.0,0.0,9.0,9.0,8.0,9.0,8.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,8.0,10.0,9.0,8.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,7.0,0.0,0.0,0.0,10.0,0.0,0.0,6.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,10.0,8.0,0.0,10.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4996,0.0,10.0,0.0,0.0,0.0,8.0,0.0,0.0,9.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4998,0.0,0.0,0.0,0.0,0.0,0.0,10.0,10.0,0.0,7.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4999,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [99]:
normReviewmatrix

array([[0.8, 0. , 0.5, ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       ...,
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ]])

In [97]:
reviewmatrix.max()
# )/(reviewmatrix.max()-reviewmatrix.min())

Anime_ID
1       10.0
2       10.0
3       10.0
4       10.0
5       10.0
        ... 
7381     7.0
7382     7.0
7383     6.0
7385     6.0
7390     6.0
Length: 7157, dtype: float64

In [94]:
reviewmatrix

Anime_ID,0.000000,0.000135,0.000271,0.000406,0.000541,0.000677,0.000812,0.000947,0.001083,0.001218,...,0.998105,0.998241,0.998376,0.998511,0.998647,0.998782,0.998917,0.999053,0.999323,1.000000
User_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.0000,0.777778,0.000000,0.444444,0.0,0.888889,0.888889,0.777778,0.888889,0.777778,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0.0002,0.000000,0.000000,0.000000,0.0,0.777778,1.000000,0.888889,0.777778,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0.0004,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.777778,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0.0006,0.000000,0.666667,0.000000,0.0,0.000000,1.000000,0.000000,0.000000,0.555556,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0.0008,0.000000,0.000000,0.000000,0.0,0.000000,1.000000,0.777778,0.000000,1.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0.9992,0.000000,1.000000,0.000000,0.0,0.000000,0.777778,0.000000,0.000000,0.888889,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0.9994,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0.9996,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,1.000000,1.000000,0.000000,0.666667,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0.9998,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
