In [None]:
!pip install pandas numpy scikit-surprise

Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp311-cp311-linux_x86_64.whl size=2505221 sha256=ee061953da11813f18b72002f810eb8bafc5eff2a7d57b8632e1f9c9a35cbbaf
  Stored in directory: /root/.cache/pip/wheels/2a/8f/6e/7e2899163e2d85d8266daab4aa1cdabec7a6c56f83c015b5af
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.4


In [None]:
!pip install numpy==1.24.4 --force-reinstall

Collecting numpy==1.24.4
  Downloading numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Downloading numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.3/17.3 MB[0m [31m77.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling numpy-2.0.2:
      Successfully uninstalled numpy-2.0.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.24.4 which is incompatible.
jaxlib 0.5.1 requires numpy>=1.25, but you have numpy 1.24.4 which is incompatible.
treescope 0.1.9 requires numpy>=1.25.2, but you have numpy 1.24.4 which is incompatible.
jax 0.5.2 requires nump

In [None]:
import pandas as pd
import numpy as np
import random
from surprise import Dataset, Reader, SVD

RANDOM_SEED = 12
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

# 1. Load the Full Ratings Dataset
print("Loading ratings_export.csv...")
ratings_export = pd.read_csv("/content/ratings_export.csv")
# We only need the user_id, movie_id, and rating_val columns for the recommendation model:
ratings_export = ratings_export[["user_id", "movie_id", "rating_val"]]

# 2. Load the User's Ratings
print("Loading user_ratings.csv...")
user_ratings = pd.read_csv("/content/user_ratings.csv")
user_ratings = user_ratings[["user_id", "movie_id", "rating_val"]]


# 3. Combine the full training dataset with the target user's ratings.
combined_data = pd.concat([ratings_export, user_ratings], ignore_index=True)
combined_data.drop_duplicates(inplace=True)

# 4. Build the SVD Model Using Surprise
# Define the reader with the appropriate rating scale. (ratings are between 1 and 10.)
reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(combined_data[["user_id", "movie_id", "rating_val"]], reader)
trainset = data.build_full_trainset()

# Initialize and train the SVD algorithm
algo = SVD(random_state=RANDOM_SEED)
algo.fit(trainset)

# 5. Generate Recommendations for the Target User
# Get the list of all movies in the combined data
all_movies = combined_data["movie_id"].unique().tolist()
# Identify the target user id (assuming all rows in user_ratings belong to the same user)
target_user_id = user_ratings["user_id"].iloc[0]
# Find movies already rated by this user
rated_movies = user_ratings["movie_id"].unique().tolist()
# Build a list of movies the user hasn't rated
movies_to_predict = [m for m in all_movies if m not in rated_movies]

# Predict ratings for the movies not yet rated by the target user
predictions = []
for movie_id in movies_to_predict:
    pred = algo.predict(target_user_id, movie_id)
    predictions.append((movie_id, pred.est))

# Sort the movies based on the predicted rating (descending order)
predictions.sort(key=lambda x: x[1], reverse=True)

# Set the number of recommendations you want
top_n = 5
top_recommendations = predictions[:top_n]

# 6. Output Recommendations
print(f"\nTop recommendations for user '{target_user_id}':")
for movie_id, est_rating in top_recommendations:
    print(f"Movie: {movie_id}, Predicted Rating: {est_rating:.2f}")

# Save the recommendations to a CSV file
recommendations_df = pd.DataFrame(top_recommendations, columns=["movie_id", "predicted_rating"])
recommendations_df.to_csv("recommendations_for_user.csv", index=False)
print("\nRecommendations saved to recommendations_for_user.csv")
rec_df = pd.read_csv("recommendations_for_user.csv")
print(rec_df.to_string(index=False))

Loading ratings_export.csv...
Loading user_ratings.csv...

Top recommendations for user 'test_user':
Movie: the-lord-of-the-rings-collection, Predicted Rating: 9.83
Movie: the-human-condition, Predicted Rating: 9.73
Movie: cowboy-bebop, Predicted Rating: 9.72
Movie: the-vietnam-war, Predicted Rating: 9.67
Movie: come-and-see, Predicted Rating: 9.61

Recommendations saved to recommendations_for_user.csv
                        movie_id  predicted_rating
the-lord-of-the-rings-collection          9.826667
             the-human-condition          9.726286
                    cowboy-bebop          9.718794
                 the-vietnam-war          9.671640
                    come-and-see          9.606568


Save model

In [None]:
from surprise.dump import dump

# After fitting the model in step 4:
algo.fit(trainset)

# Save the trained model to a file, for example "svd_model.pkl"
dump("svd_model.pkl", predictions=None, algo=algo, verbose=True)


The dump has been saved as file svd_model.pkl


Regenerate using saved model file

In [None]:
import pandas as pd
from surprise.dump import load

# -------------------------------
# 1. Load the Pre-Trained Model
# -------------------------------
# Load your pre-trained SVD model from the saved pkl file.
_, algo = load("/content/svd_model.pkl")

# -------------------------------
# 2. Load the New User's Ratings
# -------------------------------
# Read the CSV file containing the new user's ratings.
user_ratings = pd.read_csv("/content/user_ratings.csv")
# Ensure we have only the necessary columns: user_id, movie_id, rating_val
user_ratings = user_ratings[["user_id", "movie_id", "rating_val"]]
target_user_id = user_ratings["user_id"].iloc[0]
rated_movies = user_ratings["movie_id"].unique().tolist()

# -------------------------------
# 3. Load the Movie Data
# -------------------------------
# Load the movie metadata file containing all movies mentioned in ratings_export.csv.
movie_data = pd.read_csv("/content/movie_data.csv", engine='python')
# Extract the list of all movie ids
all_movies = movie_data["movie_id"].unique().tolist()

# Exclude movies the user already rated.
movies_to_predict = [m for m in all_movies if m not in rated_movies]

# -------------------------------
# 4. Generate Recommendations for the New User
# -------------------------------
predictions = []
for movie_id in movies_to_predict:
    pred = algo.predict(target_user_id, movie_id)
    predictions.append((movie_id, pred.est))

# Sort predictions by estimated rating in descending order
predictions.sort(key=lambda x: x[1], reverse=True)

# Select the top N recommendations (for example, top 5)
top_n = 5
top_recommendations = predictions[:top_n]

# -------------------------------
# 5. Print Recommendations to Console
# -------------------------------
print(f"Top recommendations for user '{target_user_id}':")
for movie_id, est_rating in top_recommendations:
    print(f"Movie: {movie_id}, Predicted Rating: {est_rating:.2f}")

Top recommendations for user 'test_user':
Movie: the-lord-of-the-rings-collection, Predicted Rating: 9.83
Movie: the-human-condition, Predicted Rating: 9.73
Movie: cowboy-bebop, Predicted Rating: 9.72
Movie: the-vietnam-war, Predicted Rating: 9.67
Movie: come-and-see, Predicted Rating: 9.61
