In [5]:
 import pandas as pd

# Load the user ratings dataset
column_names = ["user_id", "item_id", "rating", "timestamp"]
ratings = pd.read_csv("ml-100k/ml-100k/u.data", sep="\t", names=column_names)

ratings.head()

Unnamed: 0,user_id,item_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [7]:
# Pivot the ratings dataframe to create a user-item matrix
user_item_matrix = ratings.pivot_table(index="user_id", columns="item_id", values="rating")
user_item_matrix

item_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,4.0,3.0,3.0,5.0,4.0,1.0,5.0,3.0,...,,,,,,,,,,
2,4.0,,,,,,,,,2.0,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,3.0,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,,,,,,,,,5.0,,...,,,,,,,,,,
940,,,,2.0,,,4.0,5.0,3.0,,...,,,,,,,,,,
941,5.0,,,,,,4.0,,,,...,,,,,,,,,,
942,,,,,,,,,,,...,,,,,,,,,,


In [8]:
from sklearn.metrics.pairwise import cosine_similarity

# Fill the missing ratings with zeros and compute the cosine similarity
user_item_matrix_filled = user_item_matrix.fillna(0)
item_similarity_matrix = cosine_similarity(user_item_matrix_filled.T)
item_similarity_matrix

array([[1.        , 0.40238218, 0.33024479, ..., 0.        , 0.04718307,
        0.04718307],
       [0.40238218, 1.        , 0.27306918, ..., 0.        , 0.07829936,
        0.07829936],
       [0.33024479, 0.27306918, 1.        , ..., 0.        , 0.        ,
        0.09687505],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 0.        ,
        0.        ],
       [0.04718307, 0.07829936, 0.        , ..., 0.        , 1.        ,
        0.        ],
       [0.04718307, 0.07829936, 0.09687505, ..., 0.        , 0.        ,
        1.        ]])

In [12]:
import numpy as np

def recommend_movies(user_id, n_recommendations=10):
    # Get the user's movie ratings and exclude movies they have already rated
    user_ratings = user_item_matrix.loc[user_id]
    rated_movies_ids = user_ratings[user_ratings.notnull()].index

    # Compute the predicted ratings for all movies by multiplying the item similarity matrix by the user's rating
    predicted_ratings = np.dot(item_similarity_matrix, user_ratings)

    # Rank the movies by their predicted ratings and exclude movies the user has already rated
    movie_rankings = np.argsort(predicted_ratings)[::-1]
    recommended_movie_ids = [movie_id for movie_id in movie_rankings if movie_id not in rated_movies_ids]

    return recommended_movie_ids[:n_recommendations]

# Recommend 10 movies for a specific user
recommended_movies = recommend_movies(user_id=1)
print("Recommended movies for user 1:", recommended_movies)

Recommended movies for user 1: [1681, 577, 553, 554, 555, 556, 557, 558, 559, 560]
