In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Loads movie ratings data from a CSV file into a Pandas DataFrame.
data = pd.read_csv('ratings_small.csv')
data.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [3]:
# Creates a user-movie matrix
matrix = data.pivot_table(
    index = 'userId',
    columns = 'movieId',
    values = 'rating'
).fillna(0)
matrix.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,161084,161155,161594,161830,161918,161944,162376,162542,162672,163949
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
cos = cosine_similarity(matrix)
df = pd.DataFrame(cos,
                 index = matrix.index,
                 columns = matrix.index,
                 )
df.head()

userId,1,2,3,4,5,6,7,8,9,10,...,662,663,664,665,666,667,668,669,670,671
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.0,0.0,0.074482,0.016818,0.0,0.083884,0.0,0.012843,0.0,...,0.0,0.0,0.014474,0.043719,0.0,0.0,0.0,0.062917,0.0,0.017466
2,0.0,1.0,0.124295,0.118821,0.103646,0.0,0.212985,0.11319,0.113333,0.043213,...,0.477306,0.063202,0.077745,0.164162,0.466281,0.425462,0.084646,0.02414,0.170595,0.113175
3,0.0,0.124295,1.0,0.08164,0.151531,0.060691,0.154714,0.249781,0.134475,0.114672,...,0.161205,0.064198,0.176134,0.158357,0.177098,0.124562,0.124911,0.080984,0.136606,0.170193
4,0.074482,0.118821,0.08164,1.0,0.130649,0.079648,0.319745,0.191013,0.030417,0.137186,...,0.114319,0.047228,0.136579,0.25403,0.121905,0.088735,0.068483,0.104309,0.054512,0.211609
5,0.016818,0.103646,0.151531,0.130649,1.0,0.063796,0.095888,0.165712,0.086616,0.03237,...,0.191029,0.021142,0.146173,0.224245,0.139721,0.058252,0.042926,0.038358,0.062642,0.225086


In [5]:
def recommend_movies(user_id, num_recommendations):
    """
    Recommends movies for the given user.

    Arguments:
    user_id (int): ID of the user to run recommendations for.
    num_recommendations (int): Number of movies to recommend.

    Returns:
    pandas.Series: A sorted list of featured movies with their predicted ratings.    """
    
    # Exclude the user himself from the list of similar ones
    similar_users = df[user_id].sort_values(ascending=False).iloc[1:]

    # Find movies that the user has already rated
    watched_movies = matrix.loc[user_id][matrix.loc[user_id] > 0].index.tolist()

    # Create a dictionary for predictions
    predicted_ratings = {}

    # We sort through similar users
    for similar_user in similar_users.index:
        # Find movies that a similar user has watched but ours hasn't
        not_watched_by_user = matrix.loc[similar_user][matrix.loc[similar_user] > 0].index
        movies_to_recommend = list(set(not_watched_by_user) - set(watched_movies))
        
        # For each film we calculate the predicted rating
        for movie_id in movies_to_recommend:
            # Prediction = similar user's rating * similarity
            rating_from_similar = matrix.loc[similar_user, movie_id]
            similarity_score = similar_users[similar_user]
            
            # Add to dictionary if movie hasn't been taken into account yet
            if movie_id not in predicted_ratings:
                predicted_ratings[movie_id] = 0
            
            # Accumulate weighted rating and total similarity
            predicted_ratings[movie_id] += rating_from_similar * similarity_score

    # Create Series from predicted ratings and sort them
    recommendations_series = pd.Series(predicted_ratings)
    
    # Returning top-N recommended movies
    return recommendations_series.sort_values(ascending=False).head(num_recommendations)

# Example of using the function
user_id_to_recommend = 5
recommendations = recommend_movies(user_id_to_recommend, 10)

print(f"\nРекомендации для пользователя с ID={user_id_to_recommend}:")
print(recommendations)


Рекомендации для пользователя с ID=5:
318     179.302393
296     176.613237
593     163.741026
260     145.780974
480     144.003256
2571    143.445798
527     139.904201
1       132.198796
1270    128.864335
2858    127.269730
dtype: float64
