## Collaborative Filtering Recommendation System

In [24]:
# This is based on the idea that similar people based on the data have similar preferences.
#  It predicts which item a user will like based on the item preferences of other similar users. This information is gotten from explicit feedback, from ratings or from implicite feedback eg listening purchasing and watching.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
# Filtering warnings
warnings.simplefilter(action="ignore",category=FutureWarning)

In [25]:
# Loading the ratings data
ratings = pd.read_csv("https://s3-us-west-2.amazonaws.com/recommender-tutorial/ratings.csv")
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [26]:
# Loading the movies dataset
movies = pd.read_csv("https://s3-us-west-2.amazonaws.com/recommender-tutorial/movies.csv")
movies.head()  

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [58]:
# Getting all the ratings
n_ratings=len(ratings['rating'])
print(f"The number of ratings are: {n_ratings}")

The number of ratings are: 100836


In [63]:
# Getting the unique ratings
n_ratings_unique=ratings['rating'].unique()
sorted(n_ratings_unique.tolist())

[0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0]

In [47]:
# The number of Unique movies
n_movies=len(ratings['movieId'].unique())
print(f"The number of unique movies is :{n_movies}")
# Number of unique users
n_users=len(ratings['userId'].unique())
print(f"The number of unique users is: {n_users}")

The number of unique movies is :9724
The number of unique users is: 610


In [60]:
# Getting the average ratings per user
avg_ratings=n_ratings/n_users
print(f"The average number of ratings per user is: {round(avg_ratings,2)}")
# Getting the average movie ratings per movie
avg_ratings_movie=n_ratings/n_movies
print(f"The average number of ratings per movie is: {round(avg_ratings_movie,2)}")

The average number of ratings per user is: 165.3
The average number of ratings per movie is: 10.37


In [73]:
# Assessing user frequency
user_freq=ratings[["userId","movieId"]].groupby("userId").count().reset_index()
user_freq.head()

Unnamed: 0,userId,movieId
0,1,232
1,2,29
2,3,39
3,4,216
4,5,44


In [80]:
# Assessing movie ratings
movie_ratings=ratings.groupby('movieId')[['rating']].mean()
movie_ratings
# Getting the lowest rated movie
lowest_rate=movie_ratings['rating'].idxmin()
l_movies=movies[movies['movieId']==lowest_rate]
l_movies

Unnamed: 0,movieId,title,genres
2689,3604,Gypsy (1962),Musical


In [81]:
# Highest rated movie
# Getting the lowest rated movie
highest_rate=movie_ratings['rating'].idxmax()
h_movies=movies[movies['movieId']==highest_rate]
h_movies

Unnamed: 0,movieId,title,genres
48,53,Lamerica (1994),Adventure|Drama


In [97]:
# Assessing the users who rated the movies
movie_ratings_user=ratings.groupby('movieId')[['rating','userId']].mean()
movie_ratings_user.head()
# User that rated the lowest rated movie
movie_lowest=movie_ratings_user['rating'].idxmin()
# Getting the nameof the user who rated the lowest rated movie
lowest_user=movie_ratings_user[movie_ratings_user.index==movie_lowest]
lowest_user


Unnamed: 0_level_0,rating,userId
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
3604,0.5,89.0


In [98]:
# User that rated the highest rated movie
movie_highest=movie_ratings_user['rating'].idxmax()
# Getting the name of the user who rated the highest rated movie
highest_user=movie_ratings_user[movie_ratings_user.index==movie_highest]
highest_user

Unnamed: 0_level_0,rating,userId
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
53,5.0,344.0
