In [None]:
pip install scikit-surprise



In [None]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

# Step 1: Load the MovieLens dataset into a Pandas DataFrame
# Assuming you have the 'ratings.csv' file in the same directory
ratings_df = pd.read_csv('/content/ratings.csv')

# Step 2: Preprocess the dataset
# Remove duplicates and missing values
ratings_df.drop_duplicates(inplace=True)
ratings_df.dropna(inplace=True)

# Step 3: Convert the data into a Surprise dataset and split into train/test sets
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(ratings_df[['userId', 'movieId', 'rating']], reader)

trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Step 4: Build and train the collaborative filtering model (SVD)
model = SVD(n_factors=100, n_epochs=20, verbose=True)  # You can adjust these hyperparameters

# Fit the model on the training set
model.fit(trainset)

# Step 5: Evaluate the model's performance on the testing set
predictions = model.test(testset)
rmse = accuracy.rmse(predictions)
print(f'RMSE on test set: {rmse}')

# Step 6: Use the model to make recommendations for a user
user_id = 1  # Replace with the user ID for whom you want to make recommendations
user_movies = ratings_df[ratings_df['userId'] == user_id]['movieId']

# Create a list of movies the user has not rated
unrated_movies = ratings_df[~ratings_df['movieId'].isin(user_movies)]['movieId'].unique()

# Make predictions for unrated movies
user_ratings = []
for movie_id in unrated_movies:
    predicted_rating = model.predict(user_id, movie_id).est
    user_ratings.append((movie_id, predicted_rating))

# Sort the recommendations by predicted rating
user_ratings.sort(key=lambda x: x[1], reverse=True)

# Step 7: Display top N movie recommendations for the user
top_n = 10  # Adjust the number of recommendations as needed
top_movies = user_ratings[:top_n]
print(f"Top {top_n} recommended movies for User {user_id}:")
for movie_id, predicted_rating in top_movies:
    movie_title = ratings_df[ratings_df['movieId'] == movie_id]['rating'].values[0]
    print(f"Movie Title: {movie_title}, Predicted Rating: {predicted_rating:.2f}")

# Step 8: Test the model by inputting new user ratings
# You can simulate new user ratings by adding rows to the 'ratings_df' DataFrame
# Then, repeat the recommendation process for the new user.


Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
RMSE: 0.8824
RMSE on test set: 0.8824096468158413
Top 10 recommended movies for User 1:
Movie Title: 3.0, Predicted Rating: 5.00
Movie Title: 4.0, Predicted Rating: 5.00
Movie Title: 5.0, Predicted Rating: 5.00
Movie Title: 4.0, Predicted Rating: 5.00
Movie Title: 5.0, Predicted Rating: 5.00
Movie Title: 4.5, Predicted Rating: 5.00
Movie Title: 4.0, Predicted Rating: 5.00
Movie Title: 5.0, Predicted Rating: 5.00
Movie Title: 5.0, Predicted Rating: 5.00
Movie Title: 4.0, Predicted Rating: 5.00
