# RECOMENDED MOVIES FOR USER USING RECOMENDER SYSTEM

In [1]:
import pandas as pd
import numpy as np

#required libraries for recomender system using surprise package
from surprise import KNNWithMeans,Dataset, accuracy,Reader
from surprise.model_selection import train_test_split

In [2]:
# reading all the required csv file
rating = pd.read_csv("ratings.csv")
mov = pd.read_csv("movies.csv")
link = pd.read_csv("links.csv")
tag = pd.read_csv("tags.csv")
rating.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [3]:
#printing shape of all dataframe
rating.shape,mov.shape,link.shape,tag.shape

((100836, 4), (9742, 3), (9742, 3), (3683, 4))

In [4]:
#lets check the head of mov dataframe
mov.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [5]:
#reader class to mention the rating scale
reader = Reader(rating_scale=(1,5))

data = Dataset.load_from_df(rating[["userId","movieId","rating"]],reader)

In [6]:
#divide dataset in trainset and test set
[trainset,testset] = train_test_split(data,test_size=.15,shuffle=True)

recom = KNNWithMeans(k=70, sim_options={"Name":"Cosine","user_based":True})

recom.fit(data.build_full_trainset())

test_pred = recom.test(testset)

RMSE = accuracy.rmse(test_pred)

print("RMSE =",RMSE)

Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.7154
RMSE = 0.7153884127069229


### 2.Display predicted rating for a particular userID and MovieID combinations (both taken as keyboard input)

In [7]:
# Display predicted rating for a particular userID and MovieID combinations (both taken as keyboard input)
userID = int(input("Enter user ID: "))
movieID = int(input("Enter movie ID: "))
print("The rating value for  userID:",userID, "and", "movieID:",movieID, "is :",round((recom.predict(userID,movieID)[3]),2))

Enter user ID: 40
Enter movie ID: 600
The rating value for  userID: 40 and movieID: 600 is : 4.26


----

### 3.	Recommend a user to watch top 10 movies, which has not watched yet (as per his rating predictions).Take userID as a keyboard input. Fix the threshold rating as 2.5

In [48]:
mov_lst = {} #creating empty dictionary to append userid as key and movie id is value
user_id = int(input("Enter the user id:"))
for mov_id in range(1,193609):
    if recom.predict(user_id,mov_id)[3] > 2.5:
        mov_lst[mov_id] = recom.predict(user_id,mov_id)[3]

# finding 10 highest values in a Dictionary  
  
from collections import Counter
k = Counter(mov_lst)

high = k.most_common(10)
print("\n")
#printing those 10 recomended movie
print("Sugested 10 movies for user {0} are listed below".format(user_id))
print("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*")
# suuggesting 10  movies 
for i in range(10):
    print(i+1,"-->",list(mov[mov.movieId==high[i][0]]["title"])[0])

Enter the user id:600


Sugested 10 movies for user 600 are listed below
*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*
1 --> Bossa Nova (2000)
2 --> The Big Bus (1976)
3 --> Galaxy of Terror (Quest) (1981)
4 --> Looker (1981)
5 --> Alien Contamination (1980)
6 --> Master of the Flying Guillotine (Du bi quan wang da po xue di zi) (1975)
7 --> Unfaithfully Yours (1948)
8 --> Match Factory Girl, The (Tulitikkutehtaan tyttö) (1990)
9 --> Seve (2014)
10 --> Villain (1971)


### 4.Display the MovieID, IMDB ID, Average User Rating (excluding predictions), genres and tag of all the movies found in  Step 3 as a data frame

---

#### Average rating calculation

In [17]:
#finding the mean of rating for all movies
avg_rating = {}
for i in range(1,193609):
    if (i==33)|(i==35)|(i==37):#these movieid has no rating at all
        pass
    else:
        avg_rating[i] = round((rating[rating.movieId == i]['rating'].mean()),2)#finding avg rating

In [9]:
# with the rating dataframe we are joining left join
df = pd.merge(rating[["movieId","rating"]],tag[["movieId","tag"]],how="left",on="movieId")

df2 = pd.merge(df,link[["movieId","imdbId"]],how="left",on="movieId")
df3 = pd.merge(df2,mov[["movieId","genres"]],how="left",on="movieId")
df3.head()

Unnamed: 0,movieId,rating,tag,imdbId,genres
0,1,4.0,pixar,114709,Adventure|Animation|Children|Comedy|Fantasy
1,1,4.0,pixar,114709,Adventure|Animation|Children|Comedy|Fantasy
2,1,4.0,fun,114709,Adventure|Animation|Children|Comedy|Fantasy
3,3,4.0,moldy,113228,Comedy|Romance
4,3,4.0,old,113228,Comedy|Romance


In [55]:
# printing MovieID, IMDB ID,Average User Rating,genres and tag for all the movies of above userid
ls = []
for i in range(10):
    ls.append(high[i][0])
df4 = df3[df3["movieId"].isin(ls)]

#dropping rating column and we will replace that with avg_rating
df4 = df4.drop("rating",axis=1) #once you drop this column do not run it just comment it

In [56]:
id_rating_pair = [(x,y) for x,y in avg_rating.items() if x in ls]

new_rating = []
for i in range(len(id_rating_pair)):
    new_rating.append(id_rating_pair[i][1])

In [57]:
# sort movieId ascending order and adding a new column avg_rating
df5 = df4.sort_values(by="movieId")
df5["avg_rating"] = new_rating
df5

Unnamed: 0,movieId,tag,imdbId,genres,avg_rating
153010,3567,,180837,Comedy|Drama|Romance,5.0
207339,5490,,74205,Action|Comedy,5.0
904,5746,,82431,Action|Horror|Mystery|Sci-Fi,5.0
905,5764,,82677,Drama|Horror|Sci-Fi|Thriller,4.5
908,6835,,82000,Action|Horror|Sci-Fi,5.0
909,7899,,72913,Action,4.5
274789,25947,,40919,Comedy,5.0
255907,40491,depression,98532,Comedy|Drama,5.0
208682,132333,,3149640,Documentary|Drama,5.0
150252,136850,,67952,Crime|Drama|Thriller,5.0


# ~*~*~*~*~*~*~*~*~*~*~**~*~*~*~*~*~*~*~*~*END~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~