## Read Data

In [1]:
import pandas as pd
data = pd.read_csv("MovieData.csv")

In [2]:
data = data[:500000]

In [3]:
data

Unnamed: 0.1,Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,122,5.0,838985046,Boomerang (1992),Comedy|Romance
1,2,1,185,5.0,838983525,"Net, The (1995)",Action|Crime|Thriller
2,3,1,231,5.0,838983392,Dumb & Dumber (1994),Comedy
3,4,1,292,5.0,838983421,Outbreak (1995),Action|Drama|Sci-Fi|Thriller
4,5,1,316,5.0,838983392,Stargate (1994),Action|Adventure|Sci-Fi
...,...,...,...,...,...,...,...
499995,499996,3810,4369,2.5,1163446417,"Fast and the Furious, The (2001)",Action|Crime|Thriller
499996,499997,3810,4370,2.0,1163442263,A.I. Artificial Intelligence (2001),Adventure|Drama|Sci-Fi
499997,499998,3810,4378,4.0,1168804277,Sexy Beast (2000),Comedy|Crime|Drama
499998,499999,3810,4396,1.0,1164741478,"Cannonball Run, The (1981)",Action|Comedy


In [4]:
user_item_matrix = data.pivot_table(values='rating', index='userId', columns='movieId', fill_value=0)

In [5]:
user_item_matrix

movieId,1,2,3,4,5,6,7,8,9,10,...,64508,64614,64620,64716,64839,64906,64957,64969,64997,65133
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0.0,0,0,0,0.0,0.0,0,0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0.0,0,0,0,0.0,0.0,0,0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0.0,0,0,0,0.0,0.0,0,0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0.0,0,0,0,0.0,0.0,0,0
5,1.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,...,0,0,0.0,0,0,0,0.0,0.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3806,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0.0,0,0,0,0.0,0.0,0,0
3807,0.0,0.0,3.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0,...,0,0,0.0,0,0,0,0.0,0.0,0,0
3808,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,...,0,0,0.0,0,0,0,0.0,0.0,0,0
3809,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0.0,0,0,0,0.0,0.0,0,0


## Recommendation based on the user ID

In [6]:
# Define the rules for user-based filtering
def user_based_rule(user_id, movie_id):
    # Find the most similar users to the target user
    similar_users = user_item_matrix.T.corrwith(user_item_matrix.loc[user_id])
    similar_users = similar_users.sort_values(ascending=False)
    # Find the movies that the similar users have rated highly
    recommended_movies = user_item_matrix.loc[similar_users.index].mean()
    recommended_movies = recommended_movies.sort_values(ascending=False)
    return recommended_movies

## Recommendation based on the movie ID

In [7]:
# Define the rules for movies-based filtering
def item_based_rule(user_id, movie_id):
    # Find the most similar movies to the target item
    similar_items = user_item_matrix.corrwith(user_item_matrix.loc[:, movie_id])
    similar_items = similar_items.sort_values(ascending=False)

    # Find the movies that the similar items have rated highly
    recommended_movies = user_item_matrix.loc[:, similar_items.index].mean()
    recommended_movies = recommended_movies.sort_values(ascending=False)
    return recommended_movies

In [8]:
user_id = 1
movie_id = 22

In [9]:
# Use the user-based rule to make recommendations
user_based_recommendations = user_based_rule(user_id, movie_id)

# Use the item-based rule to make recommendations
item_based_recommendations = item_based_rule(user_id, movie_id)

In [10]:
user_based_recommendations

movieId
593     2.145737
296     2.026832
318     1.961046
356     1.883547
260     1.827976
          ...   
8728    0.000136
3136    0.000136
6903    0.000136
5851    0.000136
8826    0.000136
Length: 8728, dtype: float64

In [11]:
item_based_recommendations

movieId
593     2.145737
296     2.026832
318     1.961046
356     1.883547
260     1.827976
          ...   
6205    0.000136
5469    0.000136
5918    0.000136
5746    0.000136
7237    0.000136
Length: 8728, dtype: float64

In [12]:
dic = {}
for i in item_based_recommendations.index:
  dic[i] = (item_based_recommendations[i] + user_based_recommendations[i])/2

In [13]:
dic

{593: 2.145736856442386,
 296: 2.0268319259057477,
 318: 1.9610460365023155,
 356: 1.8835467175156633,
 260: 1.8279760283301554,
 480: 1.6432852083900844,
 527: 1.6416507763552166,
 110: 1.6394715336420593,
 457: 1.5866248978479978,
 589: 1.5731408335603378,
 1: 1.5538000544810677,
 50: 1.5208390084445655,
 608: 1.4882865704167803,
 1210: 1.479024788885862,
 150: 1.4539634976845546,
 1196: 1.429719422500681,
 32: 1.416643966221738,
 2858: 1.3951239444293109,
 2571: 1.3503132661400163,
 1198: 1.3264777989648597,
 590: 1.3053663851811494,
 780: 1.3035957504767093,
 858: 1.265186597657314,
 592: 1.2602833015527104,
 380: 1.2578316535004086,
 47: 1.2433941705257423,
 588: 1.2161536366112775,
 1270: 1.173385998365568,
 2028: 1.1387905202941977,
 2762: 1.1058294742576955,
 377: 1.1035140288749659,
 648: 1.0776355216562246,
 364: 1.074911468264778,
 733: 1.0347316807409426,
 1136: 1.0226096431490057,
 1240: 1.0104876055570688,
 1097: 1.0016344320348678,
 595: 0.9950967038953964,
 1197: 0.9779

## Recommended movies

In [14]:
flag = 0
for key in dic.keys():
   print(data[data['movieId'] == key]['title'].tolist()[0])
   flag+=1
   if flag == 10:
     break

Silence of the Lambs, The (1991)
Pulp Fiction (1994)
Shawshank Redemption, The (1994)
Forrest Gump (1994)
Star Wars: Episode IV - A New Hope (a.k.a. Star Wars) (1977)
Jurassic Park (1993)
Schindler's List (1993)
Braveheart (1995)
Fugitive, The (1993)
Terminator 2: Judgment Day (1991)
