# applying item based collaborative filtering 

In [37]:
import pandas as pd
import numpy as np

In [38]:
# laoding csv files 
movies = pd.read_csv('movies.csv')
ratings = pd.read_csv('ratings.csv')

In [39]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [40]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [41]:
# filtering movies that less than 10 watches 

rating_count = ratings.groupby('movieId')['rating'].count()

In [42]:
rating_count_df = pd.DataFrame(rating_count.reset_index()).rename(columns={'rating': 'rating_count'})

In [43]:
# mergeing rating_count_df with ratings df 

ratings = ratings.merge(rating_count_df, how='inner', on='movieId')

In [44]:
# mergeing ratings with movies data frames 

df = movies.merge(ratings, on = 'movieId', how='inner')

In [45]:
df.head()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp,rating_count
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1,4.0,964982703,215
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5,4.0,847434962,215
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7,4.5,1106635946,215
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15,2.5,1510577970,215
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17,4.5,1305696483,215


#### Because we're about to create a item based collaborative filtering we will extract only tha features that we need 

In [46]:
col_df = df.loc[:, ['movieId', 'title', 'genres', 'userId', 'rating', 'rating_count']]
col_df = col_df[col_df["rating_count"]>= 10]

In [47]:
col_df.head()

Unnamed: 0,movieId,title,genres,userId,rating,rating_count
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1,4.0,215
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5,4.0,215
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7,4.5,215
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15,2.5,215
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17,4.5,215


In [48]:
# New we will creat a user mover rating matrix 

user_item = pd.pivot_table(data =col_df, index='userId', columns='movieId', values='rating').fillna(0)


In [49]:
user_item

movieId,1,2,3,5,6,7,9,10,11,12,...,166461,166528,166643,168250,168252,174055,176371,177765,179819,187593
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,0.0,0.0,0.0,0.0,2.5,0.0,0.0,2.5,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,2.5,2.0,2.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,3.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [50]:
def rating_for_movie(movie_id, rating):
    # Getting to similar movies for movie in interset
    top_10_similar_movies = user_item.corrwith(user_item[movie_id]).drop(movie_id).sort_values(ascending=False).head(10)
    top_10_similar_movies = pd.DataFrame(top_10_similar_movies,columns=['similarity_socre']).reset_index()
    # multiplying similarity score by rating 

    top_10_similar_movies['pre_rate'] = top_10_similar_movies['similarity_socre'] * rating
    return top_10_similar_movies

In [15]:
def item_recommender(user_id):
    user_1 = col_df[col_df['userId'] == user_id][['movieId', 'rating']]
    df_list = []
    for index, row in user_1.iterrows():
        new_df = rating_for_movie(row['movieId'], row['rating'])
        df_list.append(new_df)
    final_df = pd.concat(df_list, ignore_index=True)
    return final_df.sort_values('pre_rate', ascending=False)

In [16]:
items_for_user_3 = item_recommender(2).head(10)

In [17]:
items_for_user_3

Unnamed: 0,movieId,similarity_socre,pre_rate
40,7438,0.820205,3.280822
250,115149,0.617796,3.088981
251,122904,0.613662,3.06831
252,122886,0.610932,3.054659
253,107406,0.601977,3.009885
90,63131,0.593606,2.968028
254,119145,0.586381,2.931907
91,107348,0.582016,2.910082
80,79132,0.646113,2.90751
92,79592,0.579894,2.899469


In [18]:
top_10s= items_for_user_3.merge(col_df, how='inner', on='movieId').sort_values('pre_rate', ascending=False)
top_10s = top_10s.drop_duplicates(subset='movieId', keep='first')

In [19]:
top_10s


Unnamed: 0,movieId,similarity_socre,pre_rate,title,genres,userId,rating,rating_count
0,7438,0.820205,3.280822,Kill Bill: Vol. 2 (2004),Action|Drama|Thriller,15,3.0,110
127,115149,0.617796,3.088981,John Wick (2014),Action|Thriller,380,5.0,29
179,122904,0.613662,3.06831,Deadpool (2016),Action|Adventure|Comedy|Sci-Fi,448,4.0,54
223,122886,0.610932,3.054659,Star Wars: Episode VII - The Force Awakens (2015),Action|Adventure|Fantasy|Sci-Fi|IMAX,475,4.5,41
245,107406,0.601977,3.009885,Snowpiercer (2013),Action|Drama|Sci-Fi,495,4.0,21
265,63131,0.593606,2.968028,Role Models (2008),Comedy,408,4.0,20
295,119145,0.586381,2.931907,Kingsman: The Secret Service (2015),Action|Adventure|Comedy|Crime,331,3.5,36
316,107348,0.582016,2.910082,Anchorman 2: The Legend Continues (2013),Comedy,408,4.0,11
396,79132,0.646113,2.90751,Inception (2010),Action|Crime|Drama|Mystery|Sci-Fi|Thriller|IMAX,331,4.0,143
480,79592,0.579894,2.899469,"Other Guys, The (2010)",Action|Comedy,419,3.5,21


## Applying user based collaborative filtering

In [20]:
df.head()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp,rating_count
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1,4.0,964982703,215
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5,4.0,847434962,215
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7,4.5,1106635946,215
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15,2.5,1510577970,215
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17,4.5,1305696483,215


In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100836 entries, 0 to 100835
Data columns (total 7 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   movieId       100836 non-null  int64  
 1   title         100836 non-null  object 
 2   genres        100836 non-null  object 
 3   userId        100836 non-null  int64  
 4   rating        100836 non-null  float64
 5   timestamp     100836 non-null  int64  
 6   rating_count  100836 non-null  int64  
dtypes: float64(1), int64(4), object(2)
memory usage: 5.4+ MB


In [199]:
item_user = pd.pivot_table(data= df,index='movieId', columns='userId', values='rating').fillna(0)
item_user

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,0.0,0.0,4.0,0.0,4.5,0.0,0.0,0.0,...,4.0,0.0,4.0,3.0,4.0,2.5,4.0,2.5,3.0,5.0
2,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,...,0.0,4.0,0.0,5.0,3.5,0.0,0.0,2.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193581,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
193583,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
193585,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
193587,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [145]:
item_user = pd.pivot_table(data= df,index='movieId', columns='userId', values='rating').fillna(0)
top_10_similar_users = item_user.corrwith(item_user[6]).drop(6).sort_values(ascending=False)
top_10_similar_users = pd.DataFrame(top_10_similar_users,columns=['Similarity_socre']).reset_index()
movies_list = []
for index, row in top_10_similar_users.iterrows():
    movies = df[df['userId']== row['userId']].sort_values('rating', ascending=False).loc[:, ['movieId','genres','title' ,'userId','rating']]
    movies['predicted_rating'] = movies['rating'] * row['Similarity_socre']
    movies_list.append(movies)
all_movies= pd.concat(movies_list, ignore_index=True).sort_values('predicted_rating', ascending=False)
all_movies = all_movies.drop_duplicates(subset=['movieId'], keep='first')
# Create a list that contains all the wantched movies by this user
watched_movies = list(df[df['userId'] == 6]['movieId'].to_numpy())
# Create a set of movieId values to drop
ids_to_drop = set()
# Collect all movieId values from watched_movies
for index, row in all_movies.iterrows():
    if row['movieId'] in watched_movies:
        ids_to_drop.add(row['movieId'])
# Drop rows based on the collected ids_to_drop
clean_movies = all_movies[~all_movies['movieId'].isin(ids_to_drop)]


Unnamed: 0,movieId,genres,title,userId,rating,predicted_rating
0,318,Crime|Drama,"Shawshank Redemption, The (1994)",117,5.0,2.805506
2,593,Crime|Horror|Thriller,"Silence of the Lambs, The (1991)",117,5.0,2.805506
3,110,Action|Drama|War,Braveheart (1995),117,5.0,2.805506
4,527,Drama|War,Schindler's List (1993),117,5.0,2.805506
1,491,Drama,"Man Without a Face, The (1993)",117,5.0,2.805506
...,...,...,...,...,...,...
100065,149508,Comedy|Romance,Spellbound (2011),89,5.0,-0.154375
100066,136834,Horror,The Eye: Infinity (2005),89,5.0,-0.154375
100068,156025,Adventure|Animation|Children|Comedy,Ice Age: The Great Egg-Scapade (2016),89,5.0,-0.154375
100070,157775,Animation|Comedy,Tenchi Muyô! In Love (1996),89,5.0,-0.154375


##### After modifications

In [183]:
import pandas as pd

def recommend_movies(df, target_user_id, top_n=10):
    """
    Recommend movies for a given user based on the ratings of similar users.

    """
    item_user = pd.pivot_table(data=df, index='movieId', columns='userId', values='rating').fillna(0)
    
    top_similar_users = item_user.corrwith(item_user[target_user_id]).drop(target_user_id).sort_values(ascending=False)
    top_similar_users = pd.DataFrame(top_similar_users, columns=['Similarity_score']).reset_index().head(top_n)

    movies_list = []
    for _, row in top_similar_users.iterrows():
        user_movies = df[df['userId'] == row['userId']].sort_values('rating', ascending=False)
        user_movies['predicted_rating'] = user_movies['rating'] * row['Similarity_score']
        movies_list.append(user_movies)

    all_movies = pd.concat(movies_list, ignore_index=True).sort_values('predicted_rating', ascending=False)
    all_movies = all_movies.drop_duplicates(subset=['movieId'], keep='first')

    watched_movies = set(df[df['userId'] == target_user_id]['movieId'])

    clean_movies = all_movies[~all_movies['movieId'].isin(watched_movies)]

    return clean_movies

In [184]:
recommend_movies(df, 6)

Unnamed: 0,movieId,title,genres,userId,rating,timestamp,rating_count,predicted_rating
306,551,"Nightmare Before Christmas, The (1993)",Animation|Children|Fantasy|Musical,58,5.0,847718937,93,2.591887
311,648,Mission: Impossible (1996),Action|Adventure|Mystery|Thriller,58,5.0,847719035,162,2.591887
314,249,Immortal Beloved (1994),Drama|Romance,58,5.0,847719464,18,2.591887
303,529,Searching for Bobby Fischer (1993),Drama,58,5.0,847719203,40,2.591887
286,280,Murder in the First (1995),Drama|Thriller,58,5.0,847719231,13,2.591887
...,...,...,...,...,...,...,...,...
260,157,Canadian Bacon (1995),Comedy|War,181,2.0,845470963,11,1.113470
521,427,Boxing Helena (1993),Drama|Mystery|Romance|Thriller,240,2.0,849122924,12,1.004510
629,52,Mighty Aphrodite (1995),Comedy|Drama|Romance,411,2.0,835533198,28,0.992501
1096,275,Mixed Nuts (1994),Comedy,559,2.0,845476898,6,0.948686
