# IMDB

In [3]:
from math import sqrt
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline

In [13]:
movies_df=pd.read_csv('movies.csv')
rating_df=pd.read_csv('ratings.csv')

## Content based filtering
- on this step , we want to use colaborative filtering , so without considering the user attention data, we work on movies data

## Movies analysis

In [14]:
movies_df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


# Movies_df data cleaning
- it contains:
    - fetching the information of year from title and making a new column for it
    - converting genres to a list of values of them with 0 , 1

In [15]:
# by Regex techniques we try to extract a part of the title of movie which refers to the year of production
# the code below make a column named 'year' and insert '(year)' information on it
movies_df['year']=movies_df.title.str.extract('(\(\d\d\d\d\))',expand=False)
# now we use the created column and replace the mentioned information with year without '()'
movies_df.year=movies_df.year.str.extract('(\d\d\d\d)',expand=False)
# now we replace the year information of title molumn with nothing. to do this we break down the tite from ( charecter and strip it
movies_df['title'] = movies_df['title'].str.split('(').str[0].str.strip()  
movies_df.title=movies_df.title.apply(lambda x: x.strip())
movies_df.head()

Unnamed: 0,movieId,title,genres,year
0,1,Toy Story,Adventure|Animation|Children|Comedy|Fantasy,1995
1,2,Jumanji,Adventure|Children|Fantasy,1995
2,3,Grumpier Old Men,Comedy|Romance,1995
3,4,Waiting to Exhale,Comedy|Drama|Romance,1995
4,5,Father of the Bride Part II,Comedy,1995


In [16]:
# by this line we convert the column genres data to an array to be able to use each item of it in our colculation
movies_df.genres=movies_df.genres.str.split('|')
movies_df.head()

Unnamed: 0,movieId,title,genres,year
0,1,Toy Story,"[Adventure, Animation, Children, Comedy, Fantasy]",1995
1,2,Jumanji,"[Adventure, Children, Fantasy]",1995
2,3,Grumpier Old Men,"[Comedy, Romance]",1995
3,4,Waiting to Exhale,"[Comedy, Drama, Romance]",1995
4,5,Father of the Bride Part II,[Comedy],1995


### now , we use two for. 1st for move on each line and 2st for each genre item of each line and make columns for each genre

In [17]:
movies_df_genres=movies_df.copy()
for index,row in movies_df.iterrows():
    for genre in row['genres']:
        movies_df_genres.at[index,genre]=1
movies_df_genres=movies_df_genres.fillna(0)
movies_df_genres.head()

Unnamed: 0,movieId,title,genres,year,Adventure,Animation,Children,Comedy,Fantasy,Romance,...,Horror,Mystery,Sci-Fi,War,Musical,Documentary,IMAX,Western,Film-Noir,(no genres listed)
0,1,Toy Story,"[Adventure, Animation, Children, Comedy, Fantasy]",1995,1.0,1.0,1.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,Jumanji,"[Adventure, Children, Fantasy]",1995,1.0,0.0,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3,Grumpier Old Men,"[Comedy, Romance]",1995,0.0,0.0,0.0,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4,Waiting to Exhale,"[Comedy, Drama, Romance]",1995,0.0,0.0,0.0,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5,Father of the Bride Part II,[Comedy],1995,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# starting Recommender system

In [42]:
userInput = [
            {'title':'Breakfast Club, The', 'rating':5},
            {'title':'Toy Story', 'rating':3.5},
            {'title':'Jumanji', 'rating':2},
            {'title':"Pulp Fiction", 'rating':5},
            {'title':'Akira', 'rating':4.5},
            
         ] 
# making a dataframe withe list above
inputMovies = pd.DataFrame(userInput)
inputMovies

Unnamed: 0,title,rating
0,"Breakfast Club, The",5.0
1,Toy Story,3.5
2,Jumanji,2.0
3,Pulp Fiction,5.0
4,Akira,4.5


#### we dont have the movie ID so , first of all we should find the movies IDfrom our data frame then merge it with the user preference rating dataframe

In [22]:
input_id=movies_df[movies_df.title.isin(inputMovies.title.tolist())]
input_id
inputMovies = pd.merge(input_id, inputMovies,on='title')
inputMovies = inputMovies.drop('year', axis=1)
inputMovies=inputMovies.drop('genres',axis=1)
inputMovies

Unnamed: 0,movieId,title,movieId_x,movieId_y,rating
0,1,Toy Story,1,1,3.5
1,2,Jumanji,2,2,2.0
2,296,Pulp Fiction,296,296,5.0
3,1274,Akira,1274,1274,4.5
4,1968,"Breakfast Club, The",1968,1968,5.0


now we need to have enough information about the genres of user movies, so we have to join this data to our refined movies dataframe

In [23]:
user_movies=movies_df_genres[movies_df_genres.movieId.isin(inputMovies.movieId.tolist())]
user_movies

Unnamed: 0,movieId,title,genres,year,Adventure,Animation,Children,Comedy,Fantasy,Romance,...,Horror,Mystery,Sci-Fi,War,Musical,Documentary,IMAX,Western,Film-Noir,(no genres listed)
0,1,Toy Story,"[Adventure, Animation, Children, Comedy, Fantasy]",1995,1.0,1.0,1.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,Jumanji,"[Adventure, Children, Fantasy]",1995,1.0,0.0,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
257,296,Pulp Fiction,"[Comedy, Crime, Drama, Thriller]",1994,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
973,1274,Akira,"[Action, Adventure, Animation, Sci-Fi]",1988,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1445,1968,"Breakfast Club, The","[Comedy, Drama]",1985,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


after making a full dataframe of our user movie it is the turn of droping unusefull information which are:
- title
- genres
- year
- movieId

In [24]:
user_movies=user_movies.reset_index(drop=True)
user_movies_genre=user_movies.drop('title',axis=1).drop('genres',axis=1).drop('year',axis=1).drop('movieId',axis=1)
user_movies_genre

Unnamed: 0,Adventure,Animation,Children,Comedy,Fantasy,Romance,Drama,Action,Crime,Thriller,Horror,Mystery,Sci-Fi,War,Musical,Documentary,IMAX,Western,Film-Noir,(no genres listed)
0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


we achieved a stunning dataframe to make our recommender system

In [25]:
inputMovies

Unnamed: 0,movieId,title,movieId_x,movieId_y,rating
0,1,Toy Story,1,1,3.5
1,2,Jumanji,2,2,2.0
2,296,Pulp Fiction,296,296,5.0
3,1274,Akira,1274,1274,4.5
4,1968,"Breakfast Club, The",1968,1968,5.0


now , we can make a list of genres with higher rating of interest which will be the basic information to recommend based on content of movies

In [48]:
# we use transpose to be able to multiply two matrix and then use 'dot' to multiply them
user_profile=user_movies_genre.transpose().dot(inputMovies.rating)
user_profile

Adventure             13.5
Animation             10.0
Children               8.5
Comedy                11.5
Fantasy                8.5
Romance                0.0
Drama                  6.5
Action                 5.0
Crime                  2.0
Thriller               2.0
Horror                 0.0
Mystery                0.0
Sci-Fi                 5.0
War                    0.0
Musical                0.0
Documentary            0.0
IMAX                   0.0
Western                0.0
Film-Noir              0.0
(no genres listed)     0.0
dtype: float64

user_profile tells us which generes have more desirable based on user rating information

In [31]:
genreTable=movies_df_genres.set_index(movies_df_genres.movieId)
#now we drop the unnecessary columns
genreTable=genreTable.drop('movieId',axis=1).drop('genres',axis=1).drop('year',axis=1).drop('title',axis=1)
genreTable

Unnamed: 0_level_0,Adventure,Animation,Children,Comedy,Fantasy,Romance,Drama,Action,Crime,Thriller,Horror,Mystery,Sci-Fi,War,Musical,Documentary,IMAX,Western,Film-Noir,(no genres listed)
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193581,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
193583,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
193585,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
193587,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [32]:
genreTable.shape

(9742, 20)

# our recommender system is ready so we use User profile and genre table to make a recomendation list

In [34]:
recommendationtable=(user_profile*genreTable).sum(axis=1)/(user_profile.sum())
recommendationtable.head()

movieId
1    0.594406
2    0.293706
3    0.188811
4    0.328671
5    0.188811
dtype: float64

- we can easily sort the list and determine the best 

In [37]:
recommendationtable=recommendationtable.sort_values(ascending=False)
recommendationtable.head()

movieId
134853    0.734266
148775    0.685315
117646    0.678322
6902      0.678322
81132     0.671329
dtype: float64

# recommended movies based on content based filtering

In [38]:
movies_df.loc[movies_df['movieId'].isin(recommendationtable.head(20).keys())]

Unnamed: 0,movieId,title,genres,year
559,673,Space Jam,"[Adventure, Animation, Children, Comedy, Fanta...",1996
1390,1907,Mulan,"[Adventure, Animation, Children, Comedy, Drama...",1998
2250,2987,Who Framed Roger Rabbit?,"[Adventure, Animation, Children, Comedy, Crime...",1988
3460,4719,Osmosis Jones,"[Action, Animation, Comedy, Crime, Drama, Roma...",2001
4631,6902,Interstate 60,"[Adventure, Comedy, Drama, Fantasy, Mystery, S...",2002
5490,26340,"Twelve Tasks of Asterix, The","[Action, Adventure, Animation, Children, Comed...",1976
5819,32031,Robots,"[Adventure, Animation, Children, Comedy, Fanta...",2005
6047,40339,Chicken Little,"[Action, Adventure, Animation, Children, Comed...",2005
6448,51939,TMNT,"[Action, Adventure, Animation, Children, Comed...",2007
6455,52287,Meet the Robinsons,"[Action, Adventure, Animation, Children, Comed...",2007


# Colaborative filtering
after implementation of content based recommender system, we work on colaborative filtering
we have two types of colaborative filtering:
- item based
- user based
    - on this project we use user based colaborative filtering

In [4]:
rating_df.shape

(100836, 4)

In [5]:
from math import sqrt

In [6]:
rating_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


we can ignore 'timestamp' information

In [11]:
#rating_df = rating_df.drop('timestamp', axis=1)
rating_df.head()

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


The process for creating a User Based recommendation system is as follows:

- Select a user with the movies the user has watched
- Based on his rating to movies, find the top X neighbours
- Get the watched movie record of the user for each neighbour.
- Calculate a similarity score using some formula
- Recommend the items with the highest score

In [7]:
userInput2 = [
            {'title':'Breakfast Club, The', 'rating':5},
            {'title':'Toy Story', 'rating':3.5},
            {'title':'Jumanji', 'rating':2},
            {'title':"Pulp Fiction", 'rating':5},
            {'title':'Akira', 'rating':4.5}
         ] 
inputMovies2 = pd.DataFrame(userInput2)
inputMovies2

Unnamed: 0,title,rating
0,"Breakfast Club, The",5.0
1,Toy Story,3.5
2,Jumanji,2.0
3,Pulp Fiction,5.0
4,Akira,4.5


we want to make a join between user behavior and movies to find input user interest, so, we do not need to genres information in colaborative filtering

In [18]:
movies_df=movies_df.drop('genres',axis=1)
movies_df.head()

Unnamed: 0,movieId,title,year
0,1,Toy Story,1995
1,2,Jumanji,1995
2,3,Grumpier Old Men,1995
3,4,Waiting to Exhale,1995
4,5,Father of the Bride Part II,1995


In [20]:
# with code below we can find user movie id on our movie dataframe
inputId2 = movies_df[movies_df['title'].isin(inputMovies2['title'].tolist())]
inputId2
inputMovies2 = pd.merge(inputId2, inputMovies2)
#Dropping information we won't use from the input dataframe
#Final input dataframe
#If a movie you added in above isn't here, then it might not be in the original 
#dataframe or it might spelled differently, please check capitalisation.
#inputMovies2

finding the user who has mutual movieid with input user

In [115]:
#Filtering out users that have watched movies that the input has watched and storing it
userSubset = rating_df[rating_df['movieId'].isin(inputMovies2['movieId'].tolist())]
userSubset.shape
userSubset.head(10)

Unnamed: 0,userId,movieId,rating
0,1,1,4.0
16,1,296,3.0
320,4,296,1.0
422,4,1968,4.0
516,5,1,4.0
533,5,296,5.0
560,6,2,4.0
692,6,296,2.0
874,7,1,4.5
1026,8,2,4.0


Now we have some little groups for each use which shows that special user and its rated movie. for example userid=5

In [138]:
userSubsetGroup = userSubset.groupby(['userId'])

In [139]:
userSubsetGroup.get_group('8')

Unnamed: 0,userId,movieId,rating
1026,8,2,4.0
1049,8,296,4.0


Sorting based on the most mutual movies with our user

In [137]:
userSubsetGroup = sorted(userSubsetGroup,  key=lambda x: len(x[1]), reverse=True)
userSubsetGroup[:3]


[(('177',),
        userId  movieId  rating
  24900    177        1     5.0
  24901    177        2     3.5
  24930    177      296     5.0
  25069    177     1274     2.0
  25129    177     1968     3.5),
 (('219',),
        userId  movieId  rating
  31524    219        1     3.5
  31525    219        2     2.5
  31554    219      296     4.0
  31628    219     1274     2.5
  31680    219     1968     3.0),
 (('274',),
        userId  movieId  rating
  39229    274        1     4.0
  39230    274        2     3.5
  39288    274      296     5.0
  39448    274     1274     4.0
  39549    274     1968     4.0)]

In [129]:
userSubsetGroup = userSubsetGroup[0:100]

# Pearson Correlation to find the degree of other users similarity with our input user

In [91]:
pearsonCorrelationDict = {}

#For every user group in our subset
for name, group in userSubsetGroup:
    #Let's start by sorting the input and current user group so the values aren't mixed up later on
    group = group.sort_values(by='movieId')
    inputMovies = inputMovies.sort_values(by='movieId')
    #Get the N for the formula
    nRatings = len(group)
    #Get the review scores for the movies that they both have in common
    temp_df = inputMovies2[inputMovies2['movieId'].isin(group['movieId'].tolist())]
    #And then store them in a temporary buffer variable in a list format to facilitate future calculations
    tempRatingList = temp_df['rating'].tolist()
    #Let's also put the current user group reviews in a list format
    tempGroupList = group['rating'].tolist()
    #Now let's calculate the pearson correlation between two users, so called, x and y
    Sxx = sum([i**2 for i in tempRatingList]) - pow(sum(tempRatingList),2)/float(nRatings)
    Syy = sum([i**2 for i in tempGroupList]) - pow(sum(tempGroupList),2)/float(nRatings)
    Sxy = sum( i*j for i, j in zip(tempRatingList, tempGroupList)) - sum(tempRatingList)*sum(tempGroupList)/float(nRatings)
    
    #If the denominator is different than zero, then divide, else, 0 correlation.
    if Sxx != 0 and Syy != 0:
        pearsonCorrelationDict[name] = Sxy/sqrt(Sxx*Syy)
    else:
        pearsonCorrelationDict[name] = 0

I made new version of pearsoncorrelation because the key value hd not appropriate structure, so made a cope of it

In [150]:
pearsonCorrelationDict
prsn_crlsion={}
for key_tuple, value in pearsonCorrelationDict.items():
    key = key_tuple[0]
    #print(f"Key: {key}, Value: {value}")
    prsn_crlsion[key]=value
prsn_crlsion

{91: 0.43852900965351443,
 177: 0.0,
 219: 0.45124262819713973,
 274: 0.716114874039432,
 298: 0.9592712306918567,
 414: 0.9376144618769914,
 474: 0.11720180773462392,
 477: 0.4385290096535153,
 480: 0.7844645405527362,
 483: 0.08006407690254357,
 599: 0.7666866491579839,
 608: 0.920736884379251,
 50: 0.15713484026367722,
 57: -0.7385489458759964,
 68: 0.0,
 103: 0.5222329678670935,
 135: 0.8703882797784892,
 182: 0.9428090415820635,
 202: 0.5222329678670935,
 217: 0.30151134457776363,
 226: 0.9438798074485389,
 288: 0.6005325641789633,
 307: 0.9655810287305759,
 318: 0.44486512077567225,
 322: 0.5057805388588731,
 330: 0.9035942578600878,
 357: 0.5606119105813882,
 434: 0.9864036607532465,
 448: 0.30151134457776363,
 469: 0.8164965809277261,
 561: 0.5222329678670935,
 600: 0.18442777839082938,
 606: 0.9146591207600472,
 610: -0.47140452079103173,
 18: 1.0,
 19: -0.5,
 21: 0,
 45: 0.5000000000000009,
 63: -0.4999999999999982,
 64: 0.0,
 66: 0.5000000000000009,
 107: -1.0,
 122: 0.86602

above code shows the users with their meature of similarity

now we make a dataframe from our pearon which had dictionary structure

In [151]:
pearsonDF = pd.DataFrame.from_dict(prsn_crlsion, orient='index')
pearsonDF.columns = ['similarityIndex']
pearsonDF['userId'] = pearsonDF.index
pearsonDF.index = range(len(pearsonDF))
pearsonDF.head()

Unnamed: 0,similarityIndex,userId
0,0.438529,91
1,0.0,177
2,0.451243,219
3,0.716115,274
4,0.959271,298


sorted verion of pearson dataframe. now we can see the top similarityindex on the top of the list

In [152]:
topUsers=pearsonDF.sort_values(by='similarityIndex', ascending=False)[0:50]
topUsers.head()

Unnamed: 0,similarityIndex,userId
43,1.0,132
34,1.0,18
63,1.0,305
82,1.0,489
86,1.0,525


now we join top users dataframe with their movies rating to find to what extent they liked their movies

In [158]:

topUsers['userId'] = topUsers['userId'].astype(str)
rating_df['userId'] = rating_df['userId'].astype(str)

topUsersRating = pd.merge(
    topUsers,
    rating_df,
    left_on='userId',
    right_on='userId',
    how='inner'
)
topUsersRating

Unnamed: 0,similarityIndex,userId,movieId,rating
0,1.0,132,1,2.0
1,1.0,132,17,3.0
2,1.0,132,29,2.0
3,1.0,132,32,3.0
4,1.0,132,34,1.5
...,...,...,...,...
27826,0.5,45,53322,5.0
27827,0.5,45,53993,4.5
27828,0.5,45,53996,4.5
27829,0.5,45,54272,5.0


now we can easily make a multiply between similarity index of top users and thir rating to make a weighted rating of them

In [159]:
#Multiplies the similarity by the user's ratings
topUsersRating['weightedRating'] = topUsersRating['similarityIndex']*topUsersRating['rating']
topUsersRating.head()

Unnamed: 0,similarityIndex,userId,movieId,rating,weightedRating
0,1.0,132,1,2.0,2.0
1,1.0,132,17,3.0,3.0
2,1.0,132,29,2.0,2.0
3,1.0,132,32,3.0,3.0
4,1.0,132,34,1.5,1.5


now we can make a group of each user interesting movies and sum their weighted rating

In [160]:

#Applies a sum to the topUsers after grouping it up by userId
tempTopUsersRating = topUsersRating.groupby('movieId').sum()[['similarityIndex','weightedRating']]
tempTopUsersRating.columns = ['sum_similarityIndex','sum_weightedRating']
tempTopUsersRating.head()

Unnamed: 0_level_0,sum_similarityIndex,sum_weightedRating
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,36.354096,133.167946
2,31.005292,94.904257
3,8.783859,26.381456
4,0.866025,1.732051
5,7.165336,19.775255


now we can make an empty dataframe to put our recommendation movies on it, then, the last part of the recommendation formula will execute to make an average of weighted rating

In [161]:
#Creates an empty dataframe
recommendation_df = pd.DataFrame()
#Now we take the weighted average
recommendation_df['weighted average recommendation score'] = tempTopUsersRating['sum_weightedRating']/tempTopUsersRating['sum_similarityIndex']
recommendation_df['movieId'] = tempTopUsersRating.index
recommendation_df.head()

Unnamed: 0_level_0,weighted average recommendation score,movieId
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,3.66308,1
2,3.060905,2
3,3.003402,3
4,2.0,4
5,2.75985,5


## Here is the top 10 nearest interesting movie with out input user

In [162]:
recommendation_df = recommendation_df.sort_values(by='weighted average recommendation score', ascending=False)
recommendation_df.head(10)

Unnamed: 0_level_0,weighted average recommendation score,movieId
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
3310,5.0,3310
7579,5.0,7579
905,5.0,905
1211,5.0,1211
140627,5.0,140627
4298,5.0,4298
152711,5.0,152711
633,5.0,633
5537,5.0,5537
5485,5.0,5485


# recommended movies based on user based colaborative recommender system

In [163]:
movies_df.loc[movies_df['movieId'].isin(recommendation_df.head(10)['movieId'].tolist())]

Unnamed: 0,movieId,title,year
536,633,Denise Calls Up,1995
687,905,It Happened One Night,1934
912,1211,Wings of Desire,1987
2484,3310,"Kid, The",1921
3189,4298,Rififi,1955
3905,5485,Tadpole,2002
3936,5537,Satin Rouge,2002
4969,7579,Pride and Prejudice,1940
9022,140627,Battle For Sevastopol,2015
9234,152711,Who Killed Chea Vichea?,2010
