In [1]:
'''Hi! Pluto'''

'Hi! Pluto'

In [2]:
import numpy as np
import pandas as pd
import math

In [3]:
movie_list = []

In [4]:
# Compute the eigenvalues and eigenvectors
def svd(A, k = -1):
    At_A = A.T@A
    eigenvalues, eigenvectors = np.linalg.eig(At_A)
    singularvalues = abs(eigenvalues) ** 0.5

    # Indices of sorted eigenvalues in descending order
    idx = np.argsort(singularvalues)[::-1]  

    sorted_singularvalues = singularvalues[idx]
    sorted_eigenvectors = eigenvectors[:, idx]

    kx = len(A)
    ky = len(A[0])

    if k > min(kx, ky):
        k = min(kx, ky)

    if k != -1:
        kx = k
        ky = k

    Sigma = np.diag(sorted_singularvalues)[:kx,:ky]

    V = sorted_eigenvectors.T
    if k != -1 :
        V = V[:k,:]

    U = []
    for i in range(min(kx, ky)):
        vi = np.array([V[i]]).T
        ui = ((((A @ vi) / sorted_singularvalues[i]).T)[0]) if sorted_singularvalues[i] != 0 else (np.zeros(kx))

        U.append(ui)

    U = np.array(U).T

    return U, Sigma, V

In [5]:
# Function to load data for recommender system

def load_data():
    ratings = pd.read_csv('Data/ratings.csv')
    movies = pd.read_csv('Data/movies.csv')

    # Create user-item matrix
    user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

    # Populate movie_list with the given information
    global movie_list
    movie_list = [
        {'movie_id': row['movieId'], 'movie_name': row['title'], 'genre': row['genres']}
        for _, row in movies.iterrows()
    ]
 
    return np.array(user_item_matrix)


In [6]:
# Normalize the data (Standard scaling)
def normalize(X, max_rating):
    r_min = np.min(X)
    r_max = np.max(X)

    X = ((X - r_min) / (r_max - r_min)) * max_rating
    
    return X

In [7]:
def estimate_r(X, K = 5):
    u_k, sigma_k, v_t_k = svd(X, K)

    X = u_k @ sigma_k @ v_t_k

    X = normalize(X, 5)

    return X

In [8]:
def recommended_movies(R, R_hat, user_row, max_movies = 10):
    # Extract the user's predicted ratings and their original ratings
    user_predictions = R_hat[user_row]
    user_original_ratings = R[user_row]
    
    # Create a list of tuples (index, predicted rating)
    predictions_with_indices = [(index, rating) for index, rating in enumerate(user_predictions)]
    
    # Sort the predictions in descending order of predicted rating
    predictions_with_indices.sort(key=lambda x: x[1], reverse=True)
    
    # Select top 10 movies that were not rated by the user (original rating is 0)
    recommendations = []
    for movie_index, predicted_rating in predictions_with_indices:
        if user_original_ratings[movie_index] == 0 and len(recommendations) < max_movies:
            recommendations.append(movie_list[movie_index])
    
    return recommendations


In [9]:
def watched_movies_indices(R, user_row):
    # Find the indices where the rating is not zero
    user_ratings = R[user_row]  # Get the user's ratings row
    watched = []
    
    for movie_index, rating in enumerate(user_ratings):
        if rating != 0:  # Check if the movie is watched (rating is non-zero)
            watched.append(movie_list[movie_index])
    
    return watched


In [10]:
def human_readable_format(X):
    # Determine the width for the movie name column dynamically
    max_movie_name_length = max(len(movie['movie_name']) for movie in X)
    column_width = max_movie_name_length + 5  # Add some padding
    
    # Display recommendations
    print(f"{'Movie Name'.ljust(column_width)} Genre")
    print("-" * (column_width + 50))  # Separator line for clarity
    for movie in X:
        genres = ", ".join(movie['genre'].split('|'))  # Split and join genres
        print(f"{movie['movie_name'].ljust(column_width)} {genres}")


In [11]:
np.set_printoptions(suppress = True)

# Load dataset
R = load_data()

# R = R[:100, :250]

R_hat = estimate_r(R)


In [None]:
# input the user and display the recommended movies
user_row = 0  # Row number of the user (0-indexed)
watched = watched_movies_indices(R, user_row)
recommendations = recommended_movies(R, R_hat, user_row)

(610, 9724)


In [13]:
print("Already Watched Movies:")
human_readable_format(watched)

Already Watched Movies:
Movie Name                                                                                                          Genre
---------------------------------------------------------------------------------------------------------------------------------------------------------------------
Toy Story (1995)                                                                                                    Adventure, Animation, Children, Comedy, Fantasy
Grumpier Old Men (1995)                                                                                             Comedy, Romance
Heat (1995)                                                                                                         Action, Crime, Thriller
Seven (a.k.a. Se7en) (1995)                                                                                         Mystery, Thriller
Usual Suspects, The (1995)                                                                                          Cr

In [14]:
print("Recommended Movies:")
human_readable_format(recommendations)

Recommended Movies:
Movie Name                                     Genre
------------------------------------------------------------------------------------------------
Terminator 2: Judgment Day (1991)              Action, Sci-Fi
Brazil (1985)                                  Fantasy, Sci-Fi
Godfather, The (1972)                          Crime, Drama
Die Hard (1988)                                Action, Crime, Thriller
Blade Runner (1982)                            Action, Sci-Fi, Thriller
Twelve Monkeys (a.k.a. 12 Monkeys) (1995)      Mystery, Sci-Fi, Thriller
Iron Giant, The (1999)                         Adventure, Animation, Children, Drama, Sci-Fi
True Lies (1994)                               Action, Adventure, Comedy, Romance, Thriller
Blues Brothers, The (1980)                     Action, Comedy, Musical
2001: A Space Odyssey (1968)                   Adventure, Drama, Sci-Fi
