### Import necessary libraries

In [1]:
import pandas as pd

In [None]:
!pip install scikit-surprise
from surprise import Dataset, Reader, KNNWithMeans, accuracy
from surprise.model_selection import train_test_split

### Split into training and test sets

In [4]:
reader = Reader(rating_scale=(1,5))
merged_data = pd.read_csv('Final_data.csv')
data = Dataset.load_from_df(merged_data[['UserID','MovieID','Rating']], reader)

# Split into train and test sets
trainset, testset = train_test_split(data, test_size=0.25)

### 3.1.3 Implement user-based collaborative filtering

Here we utilize KNNWithMeans algorithm from the surprise library. It is a collaborative filtering algorithm that enhances the classic K-Nearest Neighbors (KNN) algorithm. In KNN-based algorithms, recommendations are generated by looking at the similarity between users or items, but KNNWithMeans adds a mean-centering step to account for user or item biases. Instead of directly using the raw ratings, it subtracts the mean rating of a user or an item from individual ratings to adjust for biases.

In [9]:
sim_options_user_based = {
    "name": "cosine",
    "user_based": True,  # Compute similarities between users
}

# Initialize the KNNWithMeans algorithm with the similarity options
algo_user_based = KNNWithMeans(sim_options=sim_options_user_based)

# Train the algorithm on the trainset
algo_user_based.fit(trainset)

# Test the algorithm on the test set
predictions_user_based = algo_user_based.test(testset)

# Calculate accuracy (e.g., MAE, RMSE)
print(accuracy.mae(predictions_user_based))
print(accuracy.rmse(predictions_user_based))


Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  0.6902
0.6901809503025929
RMSE: 0.9031
0.9031266979540442


### 3.1.4 Predict rating for a specific user and movie

In [6]:
merged_data.head()

Unnamed: 0,UserID,MovieID,Rating,Title
0,1,1,4.0,Toy Story (1995)
1,1,3,4.0,Grumpier Old Men (1995)
2,1,6,4.0,Heat (1995)
3,1,47,5.0,Seven (a.k.a. Se7en) (1995)
4,1,50,5.0,"Usual Suspects, The (1995)"


In [7]:
user_id = 1
movie_id = 1193
prediction = algo_user_based.predict(user_id, movie_id)
print(prediction)


user: 1          item: 1193       r_ui = None   est = 4.93   {'actual_k': 40, 'was_impossible': False}


## 3.3 Item-based Collaborative Filtering

In [8]:
# Item-based filtering: we determine the similarities based on the movies
# instead of looking at ratings from similar users
# we look at ratings from similar movies
sim_options_movie_based = {
    "name": "cosine",
    "user_based": False,  # Compute similarities between movies
}

# Initialize the KNNWithMeans algorithm with the similarity options
algo_movie_based = KNNWithMeans(sim_options=sim_options_movie_based)

# Train the algorithm on the trainset
algo_movie_based.fit(trainset)

# Test the algorithm on the test set
predictions_movie_based = algo_movie_based.test(testset)

# Calculate accuracy (e.g., MAE, RMSE)
accuracy.mae(predictions_movie_based)
accuracy.rmse(predictions_movie_based)

Computing the cosine similarity matrix...
Done computing similarity matrix.
MAE:  0.6940
RMSE: 0.9087


0.9086512495546234