In [131]:
import numpy as np
from numpy.linalg import norm
import pandas as pd

from random import normalvariate
from math import sqrt

In [132]:
# Read data from the CSV file into a Pandas DataFrame
df = pd.read_csv("./ml-latest-small/ratings.csv")
df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [133]:
movie_ratings = df.groupby(['userId', 'movieId'])['rating'].first().unstack(fill_value=0.0)
movie_ratings.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [134]:
movie_ratings.shape

(610, 9724)

In [135]:
# Read data from the CSV file into a Pandas DataFrame
movies = pd.read_csv("./ml-latest-small/movies.csv")
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [136]:
movies.shape

(9742, 3)

In [137]:
def random_unit_vector(n):
    unnormalized = [normalvariate(0, 1) for _ in range(n)]
    the_norm = sqrt(sum(x * x for x in unnormalized))
    return [x / the_norm for x in unnormalized]


def svd_1d(A, epsilon=1e-10):
    ''' The one-dimensional SVD '''

    n, m = A.shape
    x = random_unit_vector(min(n,m))
    last_v = None
    current_v = x

    if n > m:
        B = np.dot(A.T, A)
    else:
        B = np.dot(A, A.T)

    iterations = 0
    while True:
        iterations += 1
        last_v = current_v
        current_v = np.dot(B, last_v)
        current_v = current_v / norm(current_v)

        if abs(np.dot(current_v, last_v)) > 1 - epsilon:
            print(f'converged in {iterations} iterations!')
            return current_v


def svd(A, k=None, epsilon=1e-10):
    '''
        Compute the singular value decomposition of a matrix A
        using the power method. A is the input matrix, and k
        is the number of singular values you wish to compute.
        If k is None, this computes the full-rank decomposition.
    '''
    A = np.array(A, dtype=float)
    n, m = A.shape
    svd_so_far = []
    if k is None:
        k = min(n, m)

    for i in range(k):
        print(i, k)
        matrix_for1D = A.copy()

        for singular_value, u, v in svd_so_far[:i]:
            matrix_for1D -= singular_value * np.outer(u, v)

        if n > m:
            v = svd_1d(matrix_for1D, epsilon=epsilon)  # next singular vector
            u_unnormalized = np.dot(A, v)
            sigma = norm(u_unnormalized)  # next singular value
            u = u_unnormalized / sigma
        else:
            u = svd_1d(matrix_for1D, epsilon=epsilon)  # next singular vector
            v_unnormalized = np.dot(A.T, u)
            sigma = norm(v_unnormalized)  # next singular value
            v = v_unnormalized / sigma

        svd_so_far.append((sigma, u, v))

    singular_values, us, vs = [np.array(x) for x in zip(*svd_so_far)]
    return us.T, singular_values, vs

In [138]:
U, S, Vt = svd(movie_ratings, k=50)
print(f'U:\n {U}\n {U.shape}\n')
print(f'S:\n {S}\n {S.shape}\n')
print(f'Vh:\n {Vt}\n {Vt.shape}\n')

0 50
converged in 10 iterations!
1 50
converged in 25 iterations!
2 50
converged in 44 iterations!
3 50
converged in 55 iterations!
4 50
converged in 111 iterations!
5 50
converged in 53 iterations!
6 50
converged in 54 iterations!
7 50
converged in 457 iterations!
8 50
converged in 83 iterations!
9 50
converged in 108 iterations!
10 50
converged in 265 iterations!
11 50
converged in 308 iterations!
12 50
converged in 116 iterations!
13 50
converged in 204 iterations!
14 50
converged in 651 iterations!
15 50
converged in 166 iterations!
16 50
converged in 102 iterations!
17 50
converged in 394 iterations!
18 50
converged in 335 iterations!
19 50
converged in 664 iterations!
20 50
converged in 164 iterations!
21 50
converged in 247 iterations!
22 50
converged in 166 iterations!
23 50
converged in 333 iterations!
24 50
converged in 139 iterations!
25 50
converged in 446 iterations!
26 50
converged in 389 iterations!
27 50
converged in 351 iterations!
28 50
converged in 263 iterations!
29

In [139]:
sigma_diag_matrix=np.diag(S)
all_user_predicted_ratings = np.dot(np.dot(U, sigma_diag_matrix), Vt)
preds_df = pd.DataFrame(all_user_predicted_ratings, columns = movie_ratings.columns, index=movie_ratings.index)
preds_df.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,2.181945,0.393686,0.83822,-0.082366,-0.546217,2.521687,-0.887167,-0.025204,0.196997,1.606683,...,-0.024982,-0.021413,-0.02855,-0.02855,-0.024982,-0.02855,-0.024982,-0.024982,-0.024982,-0.058993
2,0.209805,0.004825,0.030755,0.017253,0.183763,-0.06065,0.083316,0.023799,0.048103,-0.151965,...,0.018895,0.016196,0.021595,0.021595,0.018895,0.021595,0.018895,0.018895,0.018895,0.031966
3,0.013424,0.034724,0.05052,0.000199,-0.005554,0.114667,-0.007452,0.000741,0.004751,-0.061314,...,-0.001612,-0.001381,-0.001842,-0.001842,-0.001612,-0.001842,-0.001612,-0.001612,-0.001612,-0.000532
4,2.011838,-0.394911,-0.290461,0.093868,0.123117,0.259717,0.472492,0.035917,0.011216,-0.021738,...,0.001958,0.001679,0.002238,0.002238,0.001958,0.002238,0.001958,0.001958,0.001958,-0.021447
5,1.336753,0.772952,0.064573,0.113879,0.275024,0.584473,0.251061,0.131538,-0.086304,1.035321,...,-0.004407,-0.003777,-0.005036,-0.005036,-0.004407,-0.005036,-0.004407,-0.004407,-0.004407,-0.006101


In [140]:
def get_high_recommended_movies(user_id, max_recommend=100):
    movies_rated_by_user = movie_ratings.loc[user_id]
    movies_high_rated_by_user =  movies_rated_by_user[movies_rated_by_user > 2].index.tolist()
    
    movies_recommended_for_user = preds_df.loc[user_id]
    movies_high_recommend_for_user = movies_recommended_for_user[movies_recommended_for_user > 2].sort_values(ascending=False).index.tolist()
    
    recommend_id = []
    for movie_id in movies_high_recommend_for_user:
        if movie_id not in movies_high_rated_by_user:
            recommend_id.append(movie_id)
        if len(recommend_id) >= max_recommend:
            break
            
    recommend_name = []
    for movie_id in recommend_id:
        movie_name = ' '.join(str(movies.loc[movies['movieId'] == movie_id]['title']).split('\n')[0].split()[1:])
        recommend_name.append(movie_name)
        
    return recommend_name

In [143]:
user_id = 156
# user_id = int(input())
recommend_movies = get_high_recommended_movies(user_id)

print(f'user_id = {user_id}\n')
print('recommendation Movies: ')
for movie in recommend_movies:
    print(movie)

user_id = 156

recommendation Movies: 
Vertigo (1958)
Wallace & Gromit: The Wrong Trousers (1993)
Life Is Beautiful (La Vita è bella) (1997)
Hunt for Red October, The (1990)
Three Kings (1999)
On the Waterfront (1954)
Broadcast News (1987)
Mrs. Doubtfire (1993)
Traffic (2000)
Midnight Cowboy (1969)
Dog Day Afternoon (1975)
Out of Sight (1998)
All About My Mother (Todo sobre mi madre) (1999)
Risky Business (1983)
Twelve Monkeys (a.k.a. 12 Monkeys) (1995)
Princess Bride, The (1987)
400 Blows, The (Les quatre cents coups) (1959)
Sixth Sense, The (1999)
L.A. Story (1991)
Room with a View, A (1986)
Jerk, The (1979)
Double Indemnity (1944)
Jerry Maguire (1996)
Do the Right Thing (1989)
Boot, Das (Boat, The) (1981)
