In [None]:
pip install scikit-surprise


Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/154.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2357272 sha256=10ea0905738915f70ad30d429609d942c7731a1663914ae739378848f7ee6d57
  Stored in directory: /root/.cache/pip/wheels/4b/3f/df/6acbf0a40397d9bf3ff97f582cc22fb9ce66adde75bc71fd54
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Succe

In [None]:
import pandas as pd
from surprise import Dataset, Reader
from surprise import KNNBasic
from surprise.model_selection import train_test_split
from surprise import accuracy

In [None]:
# Load the data
data = {
'userId': [1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5],
'movieId': [101, 102, 103, 101, 104, 102, 103, 104, 101, 104, 103],
'rating': [5, 3, 4, 3, 5, 2, 4, 5, 4, 4, 5]
}
df = pd.DataFrame(data)


In [None]:
# Define a reader to specify the rating scale
reader = Reader(rating_scale=(1, 5))


In [None]:
# Load the data into Surprise's dataset format
surprise_data = Dataset.load_from_df(df[['userId', 'movieId', 'rating']], reader)


In [None]:
# Split the data into train and test sets
trainset, testset = train_test_split(surprise_data, test_size=0.25, random_state=42)


In [None]:
# Use K-Nearest Neighbors for collaborative filtering
algo = KNNBasic()


In [None]:
# Train the algorithm on the training set
algo.fit(trainset)


Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x7fec546ba1a0>

In [None]:
# Test the algorithm on the test set
predictions = algo.test(testset)


In [None]:
# Evaluate the algorithm
print("RMSE:", accuracy.rmse(predictions))


RMSE: 0.8165
RMSE: 0.816496580927726


In [None]:
# Function to get top-n recommendations for a specific user
def get_top_n_recommendations(algo, user_id, df, n=5):
    # Get a list of all movie IDs
    all_movie_ids = df['movieId'].unique()

    # Predict ratings for movies not rated by the user
    user_rated_movies = df[df['userId'] == user_id]['movieId'].tolist()
    unrated_movies = [movie_id for movie_id in all_movie_ids if movie_id not in user_rated_movies]

    # Predict the rating for each unrated movie
    predictions = [(movie_id, algo.predict(user_id, movie_id).est) for movie_id in unrated_movies]

    # Sort the predictions by estimated rating in descending order
    sorted_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)

    # Get the top-n movie recommendations
    top_n_recommendations = sorted_predictions[:n]

    return top_n_recommendations


In [None]:
# Generate recommendations for user with user_id=1
user_id = 1
top_n_recommendations = get_top_n_recommendations(algo, user_id, df, n=5)


In [None]:
# Display the top 5 movie recommendations for the user
print("\nTop 5 movie recommendations for user {}:".format(user_id))
for movie_id, est_rating in top_n_recommendations:
    print(f"Movie ID: {movie_id}, Predicted Rating: {est_rating:.2f}")


Top 5 movie recommendations for user 1:
Movie ID: 104, Predicted Rating: 4.00
