[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Komatsu-T/Samples/blob/main/machine_learning/singular%20value%20decomposition_2.ipynb)

# Sample of movie recomendation by simple SVD algorithm

In [None]:
# --------------------------------------------------
# Libraries
# --------------------------------------------------
import numpy as np
import pandas as pd
import codecs

In [None]:
# --------------------------------------------------
# Download sample dataset
# --------------------------------------------------

# Download
!wget https://files.grouplens.org/datasets/movielens/ml-100k.zip
!unzip -qq 'ml-100k.zip'

# DataFrame
rating_data = pd.read_csv('ml-100k/u.data', delimiter = '\t', header = None).iloc[:, :3]
rating_data.rename(columns={0: 'user_id', 1: 'item_id', 2: 'rating'}, inplace = True) # "0" means unevaluated

# Movie info
with codecs.open('ml-100k/u.item', 'r', 'utf-8', errors='ignore') as f:
    item_df = pd.read_table(f, delimiter='|', header=None)[[0, 1]]
    item_df.rename(columns = {0: 'item_id', 1: 'item_title'}, inplace = True)

--2024-05-13 15:21:43--  https://files.grouplens.org/datasets/movielens/ml-100k.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4924029 (4.7M) [application/zip]
Saving to: ‘ml-100k.zip’


2024-05-13 15:21:44 (16.9 MB/s) - ‘ml-100k.zip’ saved [4924029/4924029]



In [None]:
# --------------------------------------------------
# Data matrix
# --------------------------------------------------

# Rating value matrix
R_data = rating_data.pivot(index = 'user_id', columns = 'item_id', values = 'rating').fillna(0)
R = R_data.values

In [None]:
# --------------------------------------------------
# Singular value decomposition
# --------------------------------------------------

# Matrix reduction by SVD
def matrix_reduction(X, degree):
    # SVD
    U, S, V = np.linalg.svd(X, full_matrices = False)
    # Reduction
    row_embeddings = U[:,:degree]@np.diag(np.sqrt(S))[:degree,:degree]
    col_embeddings = V.T[:,:degree]@np.diag(np.sqrt(S))[:degree,:degree]
    return row_embeddings, col_embeddings

User_embeddings, Movie_embeddings = matrix_reduction(R, 30)

# Scoreing
Inferenced_score = User_embeddings@Movie_embeddings.T

In [None]:
# --------------------------------------------------
# Recomendation
# --------------------------------------------------

# Print recomendation
def recomendation(user_index, inferenced_matrix, rating_matrix, recomendation_num = 10, item_df = item_df):
    """
    user_index: index of user (0-)
    inferenced_matrix: inferenced rating matrix
    rating_matrix: original rating matrix
    recomendation_num: Number of recomendation movie
    item_df: index of movie
    """
    # Get rating of the specified user
    original_rating = rating_matrix[user_index]
    inferenced_rating = inferenced_matrix[user_index]

    # Unevaluated movie index
    unevaluated_movie_index = [i for i, r in enumerate(original_rating) if r == 0]

    # Get inferenced ranking
    rainkig_index = np.argsort(inferenced_rating)[::-1]

    # Print
    recomendation_movie_index = [i+1 for i in rainkig_index if i in unevaluated_movie_index][:recomendation_num]
    print(f"UserID: {user_index}")
    print("------------------------------------------------------------")
    for rank, idx in enumerate(recomendation_movie_index):
        print(f"{rank+1}: {item_df[item_df['item_id']==idx]['item_title'].values[0]}")
    print("------------------------------------------------------------")

recomendation(266, Inferenced_score, R)

UserID: 266
------------------------------------------------------------
1: Terminator 2: Judgment Day (1991)
2: Citizen Kane (1941)
3: Seven (Se7en) (1995)
4: Fugitive, The (1993)
5: Get Shorty (1995)
6: Princess Bride, The (1987)
7: Toy Story (1995)
8: Young Guns (1988)
9: Escape from New York (1981)
10: Monty Python's Life of Brian (1979)
------------------------------------------------------------
