# Singular Value Decomposition for Netflix sample Dataset

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from scipy.linalg import svd
import time

In [2]:
def get_error(data, test_data):
    data = np.array(data).flatten()
    test_data = np.array(test_data).flatten()
    mask = np.where(test_data!=0)
    test_data = test_data[mask]
    data = data[mask]
    mse = mean_squared_error(test_data, data)
    return mse

In [3]:
def get_k_rank_approximation(S, k):
    s = np.zeros(S.shape[0])
    temp = S[:k]
    s[:temp.shape[0]] = temp
    Sigma = np.zeros((data.shape[0], data.shape[0]))
    Sigma[:data.shape[1], :data.shape[1]] = np.diag(s)
    W = pd.DataFrame(Sigma)
    for i in range(671,1000):
        W[i] = 0
        
    return W

In [4]:
data = pd.read_csv('/home/drogon/Desktop/ELEG815/NETFLIX_data/ratingsData.csv', index_col = None, header = None)

In [5]:
test_data = pd.read_csv('/home/drogon/Desktop/ELEG815/NETFLIX_data/ratingsTest.csv', index_col = None, header = None)

In [6]:
# Calculate SVD of A
t1 = time.time()
U, S, VT = svd(data)
t2 = time.time()
print('time taken', t2-t1)

time taken 0.10169291496276855


In [7]:
#Calculate K-rank approximation
S_4 = get_k_rank_approximation(S, 4)
S_6 = get_k_rank_approximation(S, 6)
S_8 = get_k_rank_approximation(S, 8)
S_100 = get_k_rank_approximation(S, 100)

In [8]:
#Regenerate data matrix from approximations
A_4 = U.dot(S_4.dot(VT))
A_6 = U.dot(S_6.dot(VT))
A_8 = U.dot(S_8.dot(VT))
A_100 = U.dot(S_100.dot(VT))

In [9]:
#Get errors
initial_error = get_error(data, test_data)
error_4 = get_error(A_4, test_data)
error_6 = get_error(A_6, test_data)
error_8 = get_error(A_8, test_data)
error_100 = get_error(A_100, test_data)
print('initial MSE: ', initial_error)
print('MSE(k=4): ',error_4, ', MSE(k=6): ',error_6, ', MSE(k=8): ',error_8, ', MSE(k=100): ',error_100)

initial MSE:  13.753585723815878
MSE(k=4):  7.164110292138707 , MSE(k=6):  6.921986559109156 , MSE(k=8):  7.184498388809927 , MSE(k=100):  12.971426207764738
