In [50]:
import pandas as pd
from surprise import Dataset, Reader, SVD, accuracy, dump
from surprise.model_selection import train_test_split
import numpy as np
import ipywidgets as widgets
from IPython.display import display
from IPython.display import HTML


In [51]:
# Load movie and ratings data
movies_df = pd.read_csv("~/Desktop/MovieLens-resources/movies.csv")
ratings_df = pd.read_csv("~/Desktop/MovieLens-resources/ratings.csv")

# Drop timestamp column and limit the number of rows to 100,000 for faster processing
# ratings_df = ratings_df.drop('timestamp', axis=1)
ratings_df = ratings_df.iloc[:1000000]

# Merge movie and ratings data
merged_df = pd.merge(ratings_df, movies_df, on='movieId')

In [52]:
merged_df.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,1225734739,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,1,5.0,835815971,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,7,1,4.0,974518024,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
3,10,1,3.0,1430666394,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
4,12,1,5.0,862500738,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy


In [53]:
# Define a rating scale for Surprise Reader
reader = Reader(rating_scale=(0.5, 5.0))

In [54]:
# Load data into a Surprise Dataset
data = Dataset.load_from_df(merged_df[['userId', 'movieId', 'rating']], reader)

In [55]:
# Split the data into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2)

In [56]:
# Initialize an SVD model and train it on the training set
algo = SVD()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x11e7863d0>

In [57]:
# Make predictions on the test set and calculate RMSE
predictions = algo.test(testset)
accuracy.rmse(predictions)

RMSE: 0.8372


0.8372202713177629

# testing

In [58]:
# 1 
user_id = 10
movie_id = 15
predicted_rating = algo.predict(user_id, movie_id)
print(predicted_rating.est)

2.7291695226362016


In [59]:
# 2
user_id = 22
movie_id = 1
predicted_rating = algo.predict(user_id, movie_id)
print(predicted_rating.est)

3.6358458536916456


In [60]:
# 3
user_id = 25
movie_id = 20
predicted_rating = algo.predict(user_id, movie_id)
print(predicted_rating.est)

2.911015309462182


# movie rec

In [61]:
# Create widgets for user input and display
user_id_input = widgets.IntText(value=1, description='User ID:', min=1)
recommend_button = widgets.Button(description='Recommend')
output_box = widgets.Output()

In [62]:
from IPython.display import HTML

def get_recommendations(button):
    with output_box:
        # Clear previous output
        output_box.clear_output(wait=True)
        
        # Get user input
        user_id_to_recommend = user_id_input.value
        
        # Filter out movies the user has already rated
        user_movies = merged_df[merged_df['userId'] == user_id_to_recommend]
        
        # Sort the user's rated movies by timestamp in descending order
        user_top_movies = user_movies.sort_values(by='timestamp', ascending=False).head(5)
        
        # Display the top 5 highest-rated movies watched by the user
        display(HTML("<b>Top 5 Most Recently Watched Movies by User:</b>"))
        for index, row in user_top_movies.iterrows():
            print(f"{row['title']}, Rating: {row['rating']}")
        
        # Filter out movies the user has already rated
        movies_to_recommend = movies_df[~movies_df['movieId'].isin(user_movies['movieId'])]
        
        # Create a list of (movieId, estimated_rating) tuples for the user
        movie_recommendations = [(movie_id, algo.predict(user_id_to_recommend, movie_id).est) for movie_id in movies_to_recommend['movieId']]
        
        # Sort the list by estimated rating in descending order
        movie_recommendations.sort(key=lambda x: x[1], reverse=True)
        
        # Display the top 5 recommended movies with numbered indices
        display(HTML("<b>Top 5 Recommended Movies:</b>"))
        for index, (movie_id, estimated_rating) in enumerate(movie_recommendations[:5], start=1):
            movie_title = movies_df[movies_df['movieId'] == movie_id]['title'].values[0]
            print(f"{index}. {movie_title}, Estimated Rating: {estimated_rating}")

# Attach the function to the button's click event
recommend_button.on_click(get_recommendations)

# Display widgets
display(user_id_input, recommend_button, output_box)


IntText(value=1, description='User ID:')

Button(description='Recommend', style=ButtonStyle())

Output()