## **📥 Environment Setup and Imports**


In [1]:
import numpy as np
import pandas as pd
# from itertools import combinations
# from sklearn.metrics.pairwise import cosine_similarity
# import matplotlib.pyplot as plt

## **🛠️ Functions**

In [None]:
def movieID_to_movieName(movieId,movies):
    """
    Retrieve movie title(s) from movie ID(s).

    This function returns the movie title(s) corresponding to the given movie ID(s)
    from the provided DataFrame. It supports both single movie ID and iterable inputs
    like lists, sets, NumPy arrays, etc.

    Parameters
    ----------
    movieId : int, list, tuple, set, pd.Series, or np.ndarray
        The movie ID or collection of movie IDs to look up.
    movies : pd.DataFrame
        A DataFrame indexed by movie IDs, with a column named 'title'
        containing the corresponding movie names.

    Returns
    -------
    str or np.ndarray
        The movie title if a single ID is provided, or an array of titles
        if multiple IDs are provided.
    """
    
    if isinstance(movieId, (list, tuple, set, pd.Series, np.ndarray)):
        return np.array(movies[movies.index.isin(movieId)]['title'].tolist())
    else:
        return movies[movies.index == movieId]['title'].item()


In [None]:
def movies_watched_by_user(userId, utility_matrix):
    """
    Get movies watched by a specific user.

    This function takes a user ID and a utility matrix (user-item ratings)
    and returns the movie IDs and their corresponding ratings for the movies
    that the user has rated (i.e., non-zero entries).

    Parameters
    ----------
    userId : int or str
        The ID of the user whose watched movies are to be retrieved.
    utility_matrix : pd.DataFrame
        A user-item matrix where rows represent users and columns represent
        movie IDs. Each cell contains a rating or 0 if not rated.

    Returns
    -------
    np.ndarray
        An array of movie IDs (column names) the user has rated.
    np.ndarray
        An array of the corresponding ratings for those movies.
    """
    movies_watched=utility_matrix.loc[userId]
    movies_watched = movies_watched[movies_watched!=0]
    movies_ids= movies_watched.index
    movies_ratings= movies_watched.values
    return np.array(movies_ids), movies_ratings

In [None]:
def pearson_similarity_matrix(utility_matrix):
    """
    Compute the Pearson similarity matrix for all users.

    This function calculates the Pearson correlation-based similarity between every
    pair of users in the utility matrix. It uses only the ratings for items that
    both users have rated in common. A small constant `theta` is added to the mean
    during mean-centering to prevent losing information when a rating equals the mean.

    Parameters
    ----------
    utility_matrix : pd.DataFrame
        A user-item matrix where each row represents a user and each column
        represents a movie. The values are ratings (non-zero) or 0 if unrated.

    Returns
    -------
    pd.DataFrame
        A symmetric DataFrame where entry (i, j) represents the Pearson similarity
        between user i and user j.
    """

    users = utility_matrix.index
    n_users = len(users)
    theta=0.01
    sim_matrix = pd.DataFrame(np.zeros((n_users, n_users)), index=users, columns=users)
    
    for i in range(n_users):
        for j in range(i, n_users):
            
            u = utility_matrix.iloc[i]
            v = utility_matrix.iloc[j]
            # Get the common movies watched by both users
            common = (u != 0) & (v != 0)

            if common.sum() == 0:
                sim = 0
            else:
                
                u_common = u[common]
                v_common = v[common]
                # Theta is a small value added to the mean rating to avoid division by zero and to avoid losing data when mean == rating
                u_centered = u_common - ((theta)+(u_common.mean()))
                v_centered = v_common - ((theta)+(v_common.mean()))
                
                numerator = np.dot(u_centered, v_centered)
                denominator = np.linalg.norm(u_centered) * np.linalg.norm(v_centered)
                sim = numerator / denominator if denominator != 0 else 0

            sim_matrix.iat[i, j] = sim
            sim_matrix.iat[j, i] = sim 

    return sim_matrix


In [None]:
def common_movies_mean(utility_matrix):
    """
    Compute the mean ratings for each pair of users over their commonly rated items.

    This function calculates the average rating each user gave to the set of movies
    they both rated in common. The result is a symmetric matrix where the entry at
    (i, j) contains the mean rating of user i over their common movies with user j.

    Parameters
    ----------
    utility_matrix : pd.DataFrame
        A user-item ratings matrix where each row corresponds to a user and each
        column to a movie. Unrated items are represented by 0.

    Returns
    -------
    pd.DataFrame
        A square DataFrame of shape (n_users, n_users) where entry (i, j) is the
        mean rating of user i over the movies both user i and user j rated.
    """
    users = utility_matrix.index
    n_users = len(users)
    means_matrix = pd.DataFrame(np.zeros((n_users, n_users)), index=users, columns=users)
    for i in range(n_users):
        for j in range(i, n_users):
            u = utility_matrix.iloc[i]
            v = utility_matrix.iloc[j]
            
            common = (u != 0) & (v != 0)
            if common.sum() == 0:
                mean_i,mean_j = 0,0
            else:
                mean_i = u[common].mean()
                mean_j = v[common].mean()
            
            means_matrix.iat[i, j] = mean_i
            means_matrix.iat[j, i] = mean_j 
    return means_matrix

In [None]:
def top_k_similar_users(userId, k, similarity_matrix):
    """
    Retrieve the top-k most similar users to a given user based on a similarity matrix.

    Parameters
    ----------
    userId : int or str
        The ID of the target user for whom we want to find similar users.
        
    k : int
        The number of top similar users to return.
        
    similarity_matrix : pd.DataFrame
        A square DataFrame where entry (i, j) represents the similarity between user i and user j.

    Returns
    -------
    top_k_users : np.ndarray
        An array of user IDs corresponding to the top-k most similar users (excluding the user themself).
        
    top_k_similarities : np.ndarray
        An array of similarity values corresponding to the top-k users.
    """

    users_vector=similarity_matrix.loc[userId]

    sorted_users_vector=users_vector.sort_values(axis=0,ascending=False)

    sorted_users_vector.drop(index=userId, inplace=True)

    top_k_users=np.array(sorted_users_vector.head(k).index)
    top_k_similarites=np.array(sorted_users_vector.head(k).values)

    return top_k_users, top_k_similarites

In [None]:
def potential_movies_matrix(userId, top_similar_users, utility_matrix):
    """
    Retrieve a matrix of potential movies that the top similar users have watched but the target user has not.

    Parameters
    ----------
    userId : int or str
        The ID of the target user.
        
    top_similar_users : list or np.ndarray
        List or array of user IDs representing the top similar users to the target user.
        
    utility_matrix : pd.DataFrame
        A user-item matrix where rows represent users and columns represent movies,
        with values being the ratings (0 if not rated).

    Returns
    -------
    potential_movies : pd.DataFrame
        A DataFrame containing ratings for movies that the similar users have watched
        but the target user has not rated. Columns represent movies and rows represent users.
    """
    movies_watched, _ =movies_watched_by_user(userId , utility_matrix)
    potential_movies=utility_matrix.loc[top_similar_users]
    potential_movies.drop(movies_watched,axis=1, inplace= True)
    
    non_zero_cols = potential_movies.columns[(potential_movies != 0).any()]
    potential_movies=potential_movies[non_zero_cols]
    
    return potential_movies

In [None]:
def predict_all_ratings_per_user(userId, utility_matrix, similarity_matrix, corresponding_means_matrix):
    """
    Predict ratings for all unrated movies for a given user based on the ratings of similar users.

    This function implements a user-based collaborative filtering prediction method.
    It calculates the adjusted weighted average of ratings from the most similar users
    (excluding the target user's own ratings) and normalizes the result to fall within a [0.5, 5.0] range.

    Parameters
    ----------
    userId : int or str
        The ID of the user for whom predictions are to be made.

    utility_matrix : pd.DataFrame
        A user-item matrix with rows as users and columns as movie IDs.
        Values represent user ratings, and unrated entries are assumed to be 0.

    similarity_matrix : pd.DataFrame
        A square matrix where element (i, j) represents the similarity between user i and user j.

    corresponding_means_matrix : pd.DataFrame
        A matrix where element (i, j) is the mean rating of user i for the set of items that both users i and j have rated.

    Returns
    -------
    predictions : pd.Series
        A series of predicted ratings for the movies the target user has not rated yet.
        The ratings are normalized to the range [0.5, 5.0] and rounded to 4 decimal places.
    """
    users_in_database=len(similarity_matrix)

    user_mean=corresponding_means_matrix.loc[userId,userId]

    top_similar_users, top_similarities=top_k_similar_users(userId, (users_in_database-1), similarity_matrix)


    ratings_by_similar_users=potential_movies_matrix(userId, top_similar_users, utility_matrix)

    mask = ratings_by_similar_users != 0

    common_movies_mean_rating=corresponding_means_matrix.loc[top_similar_users,userId]

    adj_ratings_by_similar_users=ratings_by_similar_users.sub(common_movies_mean_rating, axis="rows")
    adj_ratings_by_similar_users=adj_ratings_by_similar_users*mask
    adj_ratings_by_similar_users.replace(-0, 0,inplace=True)


    effective_weights = top_similarities.reshape(-1, 1)  * mask

    numerator = (adj_ratings_by_similar_users * effective_weights).sum(axis=0)
    denominator=abs(effective_weights).sum(axis=0)
    denominator = denominator.replace(0, np.nan)

    predictions=(numerator/denominator)+user_mean
    predictions = predictions.fillna(user_mean)


    min_val = predictions.min()
    max_val = predictions.max()
    predictions = (predictions - min_val) / (max_val - min_val) * (5.0 - 0.5) + 0.5


    predictions=round(predictions,4)

    return predictions

In [None]:
def predict_all_ratings(utility_matrix, similarity_matrix, corresponding_means_matrix):
    """
    Predict ratings for all users in the utility matrix using user-based collaborative filtering.

    This function iterates over all users in the utility matrix and applies the 
    `predict_all_ratings_per_user` function to generate predicted ratings for 
    all movies that each user has not rated. The predictions are stored in a new 
    DataFrame with the same shape as the utility matrix.

    Parameters
    ----------
    utility_matrix : pd.DataFrame
        A user-item matrix where rows represent users and columns represent movie IDs.
        Rated items contain values from the rating scale (e.g., 0.5 to 5.0), and unrated items are 0.

    similarity_matrix : pd.DataFrame
        A square matrix where element (i, j) represents the similarity between user i and user j.

    corresponding_means_matrix : pd.DataFrame
        A matrix where element (i, j) is the mean rating of user i over the items 
        that both users i and j have rated.

    Returns
    -------
    predic_matrix : pd.DataFrame
        A DataFrame of the same shape as the utility matrix, where each entry represents
        the predicted rating for that user and movie. All predictions are normalized 
        between [0.5, 5.0] and rounded to 4 decimal places.
    """
    users = utility_matrix.index
    movies = utility_matrix.columns
    predic_matrix = pd.DataFrame(np.nan, index=users, columns=movies)
    for user in users:
        predic=predict_all_ratings_per_user(user, utility_matrix, similarity_matrix, corresponding_means_matrix)
        predic_matrix.loc[user]=predic
    return predic_matrix

## **🔍 Load and Explore Data**

In [10]:
ratings = pd.read_csv("ratings.csv")
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [11]:
movies = pd.read_csv("movies.csv").set_index('movieId')
movies.head()

Unnamed: 0_level_0,title,genres
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,Jumanji (1995),Adventure|Children|Fantasy
3,Grumpier Old Men (1995),Comedy|Romance
4,Waiting to Exhale (1995),Comedy|Drama|Romance
5,Father of the Bride Part II (1995),Comedy


## **📊 User-Based Collaborative Filtering**

This block contains matrices that are going to be used as inputs to the prediction function.

### 1-Utility Matrix

In [12]:
utility_matrix = ratings.pivot(index='userId', columns='movieId', values='rating')

In [None]:

all_movie_ids = movies.index
# Get the list of all movie IDs from the movies DataFrame
missing_movie_ids = set(all_movie_ids) - set(utility_matrix.columns)
# Reindex the utility matrix to include all movie IDs    
for movieid in missing_movie_ids:
    utility_matrix[movieid] = np.nan
utility_matrix = utility_matrix.reindex(sorted(utility_matrix.columns), axis=1)


In [14]:
utility_matrix.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,


In [15]:
utility_matrix.fillna(0,inplace=True)
utility_matrix.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### 2-Similarity Matrix

In [16]:
similarity_matrix=pearson_similarity_matrix(utility_matrix)
similarity_matrix.head()

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,1.0,0.079941,0.208048,0.268862,-0.291465,-0.11863,0.469758,0.918551,-0.037892,...,0.092076,0.000144,-0.061392,-0.40726,-0.164715,0.066536,0.174656,0.268163,-0.174994,-0.031853
2,1.0,1.0,0.0,1.0,1.0,0.019996,-0.990055,1.0,0.0,0.037868,...,-0.386673,1.0,-0.998202,0.0,0.0,0.583507,1.0,-0.123961,1.0,0.623383
3,0.079941,0.0,1.0,1.0,1.0,0.021208,0.0,1.0,0.0,0.0,...,0.019996,0.01,0.433224,0.0,1.0,-0.791225,-0.333247,-0.394956,0.0,0.569571
4,0.208048,1.0,1.0,1.0,-0.33641,0.148564,0.54289,0.117941,1.0,0.485815,...,-0.221893,0.396676,0.090155,-0.080194,0.400174,0.144709,0.116581,-0.170411,-0.277158,-0.043673
5,0.268862,1.0,1.0,-0.33641,1.0,0.043261,0.158233,0.028458,0.0,-0.77754,...,0.000326,0.153384,0.234814,0.067943,-0.363962,0.244427,0.231171,-0.020421,0.384258,0.040735


### 3-Corresponding Means Matrix       

In [17]:
corresponding_means_matrix=common_movies_mean(utility_matrix)
corresponding_means_matrix.head()

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.366379,5.0,4.857143,4.422222,4.230769,4.30303,4.423077,4.2,4.4,4.5,...,4.818182,4.222222,4.427273,3.916667,4.371429,4.404762,4.266667,4.301471,4.111111,4.478261
2,4.0,3.948276,0.0,4.5,3.0,3.5,3.833333,3.0,0.0,4.0,...,3.909091,3.0,4.25,0.0,0.0,3.9,3.0,3.833333,3.0,3.861111
3,2.357143,0.0,2.435897,0.5,0.5,0.5,0.0,0.5,0.0,0.0,...,0.5,0.5,1.5,0.0,3.5,1.4375,1.625,1.5,0.0,2.928571
4,3.622222,1.0,5.0,3.555556,3.75,3.185185,3.454545,2.666667,3.0,3.222222,...,3.133333,3.26087,3.582734,3.111111,3.875,3.53125,3.724138,3.243243,3.333333,3.603774
5,3.923077,3.0,5.0,4.083333,3.636364,3.555556,3.666667,3.526316,0.0,4.0,...,4.0,3.419355,3.64,3.421053,3.928571,3.826087,3.466667,3.6,3.7,3.571429


## **🧪 Testing Functions For One User**

In [18]:
test_user=52

In [19]:
movies_watched, movies_watched_rating=movies_watched_by_user(test_user, utility_matrix)
movies_watched, movies_watched_rating

(array([   150,    260,    293,    318,    356,    364,    588,    733,
          1196,   1198,   1210,   1704,   1784,   1923,   2324,   2571,
          2959,   3753,   3996,   4901,   5010,   5064,   5418,   5669,
          6874,   7153,   7361,   7438,   7445,   8464,   8665,   8874,
          8961,  30749,  31685,  31696,  33646,  34405,  34437,  35836,
         36529,  37733,  39183,  39444,  40583,  44191,  44665,  45447,
         46976,  47099,  47200,  47610,  47997,  48304,  48516,  48738,
         48774,  49272,  49530,  49651,  50794,  50872,  51077,  51255,
         51662,  52245,  52281,  52328,  52973,  54286,  54503,  54736,
         54995,  54997,  54999,  55118,  55276,  55363,  55765,  56801,
         57368,  57528,  57669,  58559,  58998,  59369,  59784,  59900,
         60069,  60684,  61132,  62374,  63082,  63113,  64620,  65514,
         68358,  68954,  69122,  69481,  70286,  71535,  72998,  73017,
         74458,  76093,  79091,  79132,  81847,  86892,  89745, 

In [20]:
movieID_to_movieName(movies_watched,movies)

array(['Apollo 13 (1995)', 'Star Wars: Episode IV - A New Hope (1977)',
       'Léon: The Professional (a.k.a. The Professional) (Léon) (1994)',
       'Shawshank Redemption, The (1994)', 'Forrest Gump (1994)',
       'Lion King, The (1994)', 'Aladdin (1992)', 'Rock, The (1996)',
       'Star Wars: Episode V - The Empire Strikes Back (1980)',
       'Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)',
       'Star Wars: Episode VI - Return of the Jedi (1983)',
       'Good Will Hunting (1997)', 'As Good as It Gets (1997)',
       "There's Something About Mary (1998)",
       'Life Is Beautiful (La Vita è bella) (1997)', 'Matrix, The (1999)',
       'Fight Club (1999)', 'Patriot, The (2000)',
       'Crouching Tiger, Hidden Dragon (Wo hu cang long) (2000)',
       'Spy Game (2001)', 'Black Hawk Down (2001)',
       'The Count of Monte Cristo (2002)', 'Bourne Identity, The (2002)',
       'Bowling for Columbine (2002)', 'Kill Bill: Vol. 1 (2003)',
       'Lor

In [21]:
predictions_by_func=predict_all_ratings_per_user(test_user, utility_matrix, similarity_matrix, corresponding_means_matrix)

In [22]:
predictions_by_func

movieId
1         2.8999
2         2.8652
3         2.9253
4         3.0193
5         2.6508
           ...  
193581    2.7175
193583    2.4470
193585    2.4470
193587    2.4470
193609    2.9301
Length: 9594, dtype: float64

## **🔮 User-Item Predictions**


In [23]:
user_movie_predictions_df=predict_all_ratings(utility_matrix, similarity_matrix, corresponding_means_matrix)

In [24]:
user_movie_predictions_df

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,2.5913,,2.4772,2.5710,,2.6539,2.5771,2.2804,2.6276,...,2.7740,2.4921,3.0559,3.0559,2.7740,3.0559,2.7740,2.7740,2.7740,3.0277
2,2.6169,2.6303,2.6407,1.9087,2.2891,2.6709,2.3010,2.2019,2.3925,2.5779,...,2.1119,1.8433,2.3806,2.3806,2.1119,2.3806,2.1119,2.1119,2.1119,3.0970
3,2.6682,2.6968,2.5023,2.8474,2.6860,2.8299,2.5227,2.3996,2.6444,2.6852,...,2.7500,2.7500,2.7500,2.7500,2.7500,2.7500,2.7500,2.7500,2.7500,2.7500
4,2.6927,2.6519,2.4369,2.1782,2.5013,2.5845,2.4085,2.2479,2.4116,2.5016,...,3.0147,3.2794,2.7500,2.7500,3.0147,2.7500,3.0147,3.0147,3.0147,2.4191
5,,2.6211,2.4780,2.0989,2.4920,2.7570,2.5952,2.6771,2.0770,2.5862,...,2.6857,2.6857,2.6857,2.6857,2.6857,2.6857,2.6857,2.6857,2.6857,2.8914
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,,2.6827,2.5935,2.5788,2.4202,2.9606,,2.2390,2.3800,2.7075,...,3.0373,3.3347,2.7400,2.7400,3.0373,2.7400,3.0373,3.0373,3.0373,3.0770
607,,2.5439,2.7719,2.7577,2.5051,2.6500,2.6387,2.0949,2.4730,2.4937,...,2.6176,2.3529,2.8824,2.8824,2.6176,2.8824,2.6176,2.6176,2.6176,2.4522
608,,,,2.2463,2.4551,3.1230,2.6616,2.4923,2.4567,,...,2.8231,2.5316,3.1145,3.1145,2.8231,3.1145,2.8231,2.8231,2.8231,3.1943
609,,2.7669,2.7458,2.3065,2.5795,3.1034,2.6160,2.4761,2.3798,,...,2.9251,2.9251,2.9251,2.9251,2.9251,2.9251,2.9251,2.9251,2.9251,2.7455


In [25]:
user_movie_predictions_df.to_csv('Output/Users_Movies_Predictions_Raw.csv', index=True) 

In [26]:
processed_user_movie_predictions_df=user_movie_predictions_df.fillna(0)
processed_user_movie_predictions_df.columns = processed_user_movie_predictions_df.columns.map(lambda x: movieID_to_movieName(x, movies))

In [27]:
processed_user_movie_predictions_df

movieId,Toy Story (1995),Jumanji (1995),Grumpier Old Men (1995),Waiting to Exhale (1995),Father of the Bride Part II (1995),Heat (1995),Sabrina (1995),Tom and Huck (1995),Sudden Death (1995),GoldenEye (1995),...,Gintama: The Movie (2010),anohana: The Flower We Saw That Day - The Movie (2013),Silver Spoon (2014),Love Live! The School Idol Movie (2015),Jon Stewart Has Left the Building (2015),Black Butler: Book of the Atlantic (2017),No Game No Life: Zero (2017),Flint (2017),Bungo Stray Dogs: Dead Apple (2018),Andrew Dice Clay: Dice Rules (1991)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0000,2.5913,0.0000,2.4772,2.5710,0.0000,2.6539,2.5771,2.2804,2.6276,...,2.7740,2.4921,3.0559,3.0559,2.7740,3.0559,2.7740,2.7740,2.7740,3.0277
2,2.6169,2.6303,2.6407,1.9087,2.2891,2.6709,2.3010,2.2019,2.3925,2.5779,...,2.1119,1.8433,2.3806,2.3806,2.1119,2.3806,2.1119,2.1119,2.1119,3.0970
3,2.6682,2.6968,2.5023,2.8474,2.6860,2.8299,2.5227,2.3996,2.6444,2.6852,...,2.7500,2.7500,2.7500,2.7500,2.7500,2.7500,2.7500,2.7500,2.7500,2.7500
4,2.6927,2.6519,2.4369,2.1782,2.5013,2.5845,2.4085,2.2479,2.4116,2.5016,...,3.0147,3.2794,2.7500,2.7500,3.0147,2.7500,3.0147,3.0147,3.0147,2.4191
5,0.0000,2.6211,2.4780,2.0989,2.4920,2.7570,2.5952,2.6771,2.0770,2.5862,...,2.6857,2.6857,2.6857,2.6857,2.6857,2.6857,2.6857,2.6857,2.6857,2.8914
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.0000,2.6827,2.5935,2.5788,2.4202,2.9606,0.0000,2.2390,2.3800,2.7075,...,3.0373,3.3347,2.7400,2.7400,3.0373,2.7400,3.0373,3.0373,3.0373,3.0770
607,0.0000,2.5439,2.7719,2.7577,2.5051,2.6500,2.6387,2.0949,2.4730,2.4937,...,2.6176,2.3529,2.8824,2.8824,2.6176,2.8824,2.6176,2.6176,2.6176,2.4522
608,0.0000,0.0000,0.0000,2.2463,2.4551,3.1230,2.6616,2.4923,2.4567,0.0000,...,2.8231,2.5316,3.1145,3.1145,2.8231,3.1145,2.8231,2.8231,2.8231,3.1943
609,0.0000,2.7669,2.7458,2.3065,2.5795,3.1034,2.6160,2.4761,2.3798,0.0000,...,2.9251,2.9251,2.9251,2.9251,2.9251,2.9251,2.9251,2.9251,2.9251,2.7455


In [28]:
processed_user_movie_predictions_df.to_csv('Output/Users_Movies_Predictions_Processed.csv', index=True) 