In [1]:
pip install pandas scikit-learn surprise

Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl.metadata (327 bytes)
Collecting scikit-surprise (from surprise)
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp311-cp311-linux_x86_64.whl size=2505192 sha256=81a659868afb4f62c5b589ccc55c3017c138257e30c4cbb681a987ac31ddc678
  Stored in directory: /root/.cache/pip/wheels/2a/8f/6e/7e2899163e2d85d8266daab4aa1cdabec7a6c56f83c015b5af
Successfully built scikit-surprise
Install

In [2]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

# Load the dataset
column_names = ['UserID', 'MovieID', 'Rating', 'Timestamp']
df = pd.read_csv('u.data', sep='\t', names=column_names)

# Drop the timestamp column (not needed for recommendations)
df = df.drop('Timestamp', axis=1)

# Display the first few rows
print(df.head())

   UserID  MovieID  Rating
0     196      242       3
1     186      302       3
2      22      377       1
3     244       51       2
4     166      346       1


In [3]:
# Define the rating scale (1 to 5)
reader = Reader(rating_scale=(1, 5))

# Load the dataset into Surprise's format
data = Dataset.load_from_df(df[['UserID', 'MovieID', 'Rating']], reader)

In [5]:
# Split the dataset into 75% training and 25% testing
trainset, testset = train_test_split(data, test_size=0.25)

In [6]:
# Initialize the SVD algorithm
model = SVD()

# Train the model on the training set
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x784f53bffe10>

In [7]:
# Make predictions on the test set
predictions = model.test(testset)

# Calculate RMSE (Root Mean Squared Error)
rmse = accuracy.rmse(predictions)
print(f'RMSE: {rmse}')

RMSE: 0.9374
RMSE: 0.9374149241142342


In [10]:
# Function to get top N recommendations for a user
def get_top_n_recommendations(user_id, n=5):
    # Get a list of all movie IDs
    all_movie_ids = df['MovieID'].unique()

    # Get the list of movies the user has already rated
    rated_movies = df[df['UserID'] == user_id]['MovieID']

    # Predict ratings for movies the user hasn't rated
    predictions = []
    for movie_id in all_movie_ids:
        if movie_id not in rated_movies:
            predicted_rating = model.predict(user_id, movie_id).est
            predictions.append((movie_id, predicted_rating))

    # Sort the predictions by rating (highest first)
    predictions.sort(key=lambda x: x[1], reverse=True)

    # Return the top N recommendations
    return predictions[:n]

# Example: Get top 5 recommendations for User ID 196
user_id = 196
top_recommendations = get_top_n_recommendations(user_id, n=5)
print(f"Top 5 recommendations for User {user_id}: {top_recommendations}")

Top 5 recommendations for User 196: [(12, 4.595388243400827), (408, 4.5628697649287), (50, 4.561691041697883), (483, 4.543015390676873), (172, 4.509118241699341)]
