<a href="https://colab.research.google.com/github/Fayis-PV/100_ML_Projects/blob/main/2.Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline


In [None]:
ratings = pd.read_csv('https://s3-us-west-2.amazonaws.com/recommender-tutorial/ratings.csv')
ratings.describe()

Unnamed: 0,userId,movieId,rating,timestamp
count,100836.0,100836.0,100836.0,100836.0
mean,326.127564,19435.295718,3.501557,1205946000.0
std,182.618491,35530.987199,1.042529,216261000.0
min,1.0,1.0,0.5,828124600.0
25%,177.0,1199.0,3.0,1019124000.0
50%,325.0,2991.0,3.5,1186087000.0
75%,477.0,8122.0,4.0,1435994000.0
max,610.0,193609.0,5.0,1537799000.0


In [None]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [None]:
movies = pd.read_csv('https://s3-us-west-2.amazonaws.com/recommender-tutorial/movies.csv')
movies.describe()

Unnamed: 0,movieId
count,9742.0
mean,42200.353623
std,52160.494854
min,1.0
25%,3248.25
50%,7300.0
75%,76232.0
max,193609.0


In [None]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [None]:
ratings_count = len(ratings)
movies_count = len(ratings['movieId'].unique())
users_count = len(ratings['userId'].unique())

print('Number of ratings: ', ratings_count)
print('Number of unique movieId: ', movies_count)
print('Number of unique users: ', users_count)


Number of ratings:  100836
Number of unique movieId:  9724
Number of unique users:  610


In [None]:
rating_freq = ratings[['userId','movieId']].groupby('userId').count().reset_index()
rating_freq.columns = ['userId','rating_count']
rating_freq.head()

Unnamed: 0,userId,rating_count
0,1,232
1,2,29
2,3,39
3,4,216
4,5,44


In [None]:
movie_rating = ratings.groupby('movieId')['rating'].mean()

less_rated_movie_id = movie_rating.idxmin()
less_rated_movie = movies[movies['movieId'] == less_rated_movie_id]
print('Less rated:', less_rated_movie.title)

most_rated_movie_id = movie_rating.idxmax()
most_rated_movie = movies[movies['movieId'] == most_rated_movie_id]
print('Most rated:', most_rated_movie.title)

movie_status = ratings.groupby('movieId')[['rating']].agg(['count','mean'])
movie_status.columns.droplevel()
movie_status.head()

Less rated: 2689    Gypsy (1962)
Name: title, dtype: object
Most rated: 48    Lamerica (1994)
Name: title, dtype: object


Unnamed: 0_level_0,rating,rating
Unnamed: 0_level_1,count,mean
movieId,Unnamed: 1_level_2,Unnamed: 2_level_2
1,215,3.92093
2,110,3.431818
3,52,3.259615
4,7,2.357143
5,49,3.071429


In [None]:
from scipy.sparse import csr_matrix

def create_matrix(ratings):

  total_users = len(ratings['userId'].unique())
  total_movies = len(ratings['movieId'].unique())

  user_ind_map = dict(zip(np.unique(ratings["userId"]), list(range(total_users))))
  movie_ind_map = dict(zip(np.unique(ratings["movieId"]), list(range(total_movies))))

  user_id_map = dict(zip(list(range(total_users)),np.unique(ratings["userId"])))
  movie_id_map = dict(zip(list(range(total_movies)),np.unique(ratings["movieId"])))

  user_ind = [user_ind_map[i] for i in ratings['userId']]
  movie_ind = [movie_ind_map[i] for i in ratings['movieId']]

  X = csr_matrix((ratings['rating'],(movie_ind,user_ind)), shape = (total_movies,total_users),)

  return X, user_ind_map, movie_ind_map, user_id_map, movie_id_map

X, user_ind_map, movie_ind_map, user_id_map, movie_id_map = create_matrix(ratings)



Find Similiar Movies

In [None]:
def find_similiar_movies(movie_id, k, metrics='cosine', show_distance=False):

  neighbour_ids = []

  movie_ind = movie_ind_map[movie_id]
  movie_feature = X[movie_id]

  k+=1
  neigh = NearestNeighbors(n_neighbors=k, algorithm='brute', metric=metrics)
  neigh.fit(X)

  movie_feature = movie_feature.reshape(1,-1)
  neighbors = neigh.kneighbors(movie_feature, return_distance=show_distance)

  for i in range(0,k):
     n = neighbors.item(i)
     neighbour_ids.append(movie_id_map[n])

  neighbour_ids.pop(0)

  return neighbour_ids


movie_Id = 2
similiar_movies_Ids = find_similiar_movies(movie_Id,k=10)

In [None]:
Id_title_map = dict(zip(movies['movieId'],movies['title']))

def get_movie_title(movie_Id):
  return Id_title_map[movie_Id] if movie_Id in Id_title_map else 'Movie is not Found.'

movie_title = get_movie_title(movie_Id)
print('Movie Title: ',movie_title)
print('Similiar Movies: ')
for Id in similiar_movies_Ids:
  print(get_movie_title(Id))

Movie Title:  Jumanji (1995)
Similiar Movies: 
Grumpy Old Men (1993)
Striptease (1996)
Nutty Professor, The (1996)
Twister (1996)
Father of the Bride Part II (1995)
Broken Arrow (1996)
Bio-Dome (1996)
Truth About Cats & Dogs, The (1996)
Sabrina (1995)
Birdcage, The (1996)


Movie Recommendation based on User Preference

In [None]:
def recommend_movie_for_user(userId,k=10):
  try:
    df = ratings[ratings['userId'] == userId]
  except:
    print('User not found.')
    df = ratings[ratings['rating'] > 3.0]

  movie_Id = df[ratings['rating'] == df['rating'].max()]['movieId'].values[0]
  similiar_movies_Ids = find_similiar_movies(movie_Id,k=k)
  movie_title = get_movie_title(movie_Id)

  recommended_movies = [get_movie_title(Id) for Id in similiar_movies_Ids]

  return similiar_movies_Ids, movie_Id, recommended_movies, movie_title



In [None]:
userId = 344
similiar_movies_Ids, movie_Id, recommended_movies, movie_title = recommend_movie_for_user(userId=userId,k=10)

print(f"Since you watched {movie_title}, you might also like:")
for i in range(len(recommended_movies)):
  print(f"{i+1}. {recommended_movies[i]} - {similiar_movies_Ids[i]}")

Since you watched Star Wars: Episode IV - A New Hope (1977), you might also like:
1. Crimson Tide (1995) - 161
2. Dances with Wolves (1990) - 590
3. Get Shorty (1995) - 21
4. Fugitive, The (1993) - 457
5. Firm, The (1993) - 454
6. Apollo 13 (1995) - 150
7. Batman (1989) - 592
8. True Lies (1994) - 380
9. Dave (1993) - 440
10. Outbreak (1995) - 292
