In [150]:
# Imports
import pandas as pd
import numpy as np
from scipy.linalg import svd, diagsvd # SVD
from copy import deepcopy # Deep copy

In [151]:
df = pd.read_csv('./archive/ratings_small.csv')
df = df[['userId', 'movieId', 'rating']]
df = df.pivot(index='userId', columns='movieId', values='rating')
df_0 = df.fillna(0)
df = df.fillna(2.5)
matrix_a = df.to_numpy()
matrix_a_0 = df_0.to_numpy()
df.head(5)

movieId,1,2,3,4,5,6,7,8,9,10,...,161084,161155,161594,161830,161918,161944,162376,162542,162672,163949
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5,...,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5
2,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5,4.0,...,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5
3,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5,...,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5
4,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5,4.0,...,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5
5,2.5,2.5,4.0,2.5,2.5,2.5,2.5,2.5,2.5,2.5,...,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5


In [152]:
# Since the original df has many movies that have not been rated
# by any user, we need to drop those columns from the df, and 
# translate their indices in matrix A to the Ids in the original df
df_cleared = df_0.loc[:, (df != 0).any(axis=0)]
df_index_list = df_cleared.index.tolist()
df_column_list = df_cleared.columns.tolist()

# We've defined these global variables above to save time and memory
def get_ids_from_matrix(df_index_list, df_column_list, user_id, movie_id):
    return df_index_list[user_id], df_column_list[movie_id]

In [153]:
userRow = np.random.randint(matrix_a.shape[0])

# Gets a random value for the column where the value isn't 0
movieColumn = np.random.choice(np.where(matrix_a_0[userRow] != 0)[0])

userId, movieId= get_ids_from_matrix(df_index_list, df_column_list, userRow, movieColumn)
print("Matrix Row -> " + str(userRow))
print("Original UserId -> " + str(userId) + "\n")
print("Matrix Column -> " + str(movieColumn))
print("Original MovieId -> " + str(movieId))

Matrix Row -> 307
Original UserId -> 308

Matrix Column -> 658
Original MovieId -> 802


In [154]:
matrix_b = deepcopy(matrix_a)
matrix_b[(userRow, movieColumn)] = np.random.randint(1, 10) / 2

print("Changed Cell -> " + str((userRow, movieColumn)))
print("Old Value -> " + str(matrix_a[(userRow, movieColumn)]))
print("New value -> " + str(matrix_b[(userRow, movieColumn)]))

Changed Cell -> (307, 658)
Old Value -> 5.0
New value -> 0.5


In [155]:
# SVD - matrix B
U, S, V = svd(matrix_b)
S[-650:] = 0
sigma = diagsvd(S, matrix_b.shape[0], matrix_b.shape[1])

# Reconstruct matrix b
matrix_b_reconstructed = U @ sigma @ V
matrix_b_reconstructed

array([[2.5458591 , 2.4874076 , 2.57806967, ..., 2.50642681, 2.50041507,
        2.49935478],
       [2.86763439, 2.94495448, 2.52096127, ..., 2.50333853, 2.50054336,
        2.50327431],
       [2.71794255, 2.57944439, 2.5259145 , ..., 2.50788778, 2.50320378,
        2.49656283],
       ...,
       [2.68459449, 2.45969417, 2.58529054, ..., 2.50489779, 2.50015042,
        2.48874305],
       [3.12520835, 2.60026828, 2.55163203, ..., 2.50376206, 2.5015419 ,
        2.51549858],
       [3.66423905, 2.55474206, 2.405029  , ..., 2.4982674 , 2.49770601,
        2.4957694 ]])

In [156]:
print("Old Value -> " + str(matrix_a[(userRow, movieColumn)]))
print("New value -> " + str(matrix_b_reconstructed[(userRow, movieColumn)]))

Old Value -> 5.0
New value -> 2.6277236952629526
