#   <center>  Rating-based movie recommendation

## findClosestCritics
This function is to return a list of three critics, whose ratings of movies are most similar 
    to those provided in the personal ratings data based on Euclidean distance. The lower the 
    distance, the closer, thus more similar, the critic's ratings are to the person's. 
     
    Parameters:
    criticsDataFrame - provides data about critics ratings
    personalDataFrame - provides data about personal ratings 

In [1]:
def findClosestCritics(criticsRatingDF, personalRatingDF):
    import pandas as pd
    import numpy as np
    matchedMovieDF = pd.merge(personalRatingDF, criticsRatingDF) # merge the two DFs together with matched movies
    matchedMovieDF.index = list(matchedMovieDF.loc[:,'Title']) # set Title as index
    matchedMovieDF.drop(columns = ['Title'], inplace = True) # drop Title column
       
    y = list(matchedMovieDF.iloc[:,0]) # get a list of personal rating values

    criticsListDF = pd.DataFrame(index = matchedMovieDF.columns[1:], columns = ['Euclidean']) # create an empty DF to record euclidean for each critics
    
    for i in range(matchedMovieDF.shape[1]-1): # calculate euclidean distance for each critics vs. personal
        x = list(matchedMovieDF.iloc[:,i+1])
        sqrsum = 0
        for j in range(matchedMovieDF.shape[0]):
            sqrsum = sqrsum + (x[j] - y[j])**2
            j += 1
        Euclidean = np.sqrt(sqrsum)
        criticsListDF.iloc[i,0] = Euclidean # assign euclidean values to DF
        i += 1
    
    criticsListDF = criticsListDF.sort_values(by = ['Euclidean']) # sort critics by Euclidean distance in ascending order
    criticsList = list(criticsListDF.index[:3])
     
    return criticsList

## recommendMovies
This function is to compute the top-rated unwatched movies in each genre category 
    based on the average of the three critics' ratings
     
    Parameters:
    criticsDataFrame - provides data about critics' ratings
    personalDataFrame - provides data about personal ratings 
    topThreeCriticsLst - a list of three critics, whose ratings of movies are most similar to 
    those provided in the personal ratings data
    movieDataFrame - provides data about movies info

In [2]:

def recommendMovies(personalRatingDF, criticsRatingDF, criticsList, movieDF):   
    import pandas as pd
    movieDF.index = list(movieDF.loc[:,'Title']) # set Title as index
    movieDF = pd.DataFrame(movieDF, columns = ['Title','criticsRating','Genre1','Year','Runtime'])
    
    criticsRatingDF.index = list(criticsRatingDF.loc[:,'Title']) # set Title as index
    criticsRatingDF = pd.DataFrame(criticsRatingDF, columns = criticsList) # extract data for 3 critics

    movieDF.criticsRating = list(criticsRatingDF.mean(axis = 1)) # calculate average rating for each movie

    personalRatingDF.index = list(personalRatingDF.loc[:,'Title']) # set Title as index

    unwatchedMovie = movieDF.index.difference(personalRatingDF.index) # get unwatched movies
    unwatchedMovieDF = movieDF.reindex(index = unwatchedMovie)

    TopRatedDF= unwatchedMovieDF.groupby(by = 'Genre1', as_index = False)['criticsRating'].max() # get top 1 rating movie for each genre
    
    TopRatedUnwatchedDF = pd.merge(unwatchedMovieDF,TopRatedDF) # merge with the unwatched movie list
    TopRatedUnwatchedDF = TopRatedUnwatchedDF.sort_values(by = ['Genre1']) # sort by genre
    
    return TopRatedUnwatchedDF


## printRecommendations
This function is to printout all the recommended movies in alphabetical order by the genre.
    
    Parameters:
    movieRecommendation - provides data about critics' ratings
    personName - the person's name for whom the recommendation is made for

In [3]:
 
def printRecommendations(TopRatedUnwatchedDF, personName):
    import pandas as pd
    print('Recommendations for', personName, ':')
    
    maxLengthOfTitle = max(TopRatedUnwatchedDF['Title'].apply(len)) # get max length of title
 
    for i in range(TopRatedUnwatchedDF.shape[0]): # print each row
        title = TopRatedUnwatchedDF.iloc[i,0]
        genre = TopRatedUnwatchedDF.iloc[i,2]
        rating = round(TopRatedUnwatchedDF.iloc[i,1], 2)
        year = TopRatedUnwatchedDF.iloc[i,3]
         
        if pd.isnull(TopRatedUnwatchedDF.iloc[i,4]):
            print(('"'+title+'"').ljust(maxLengthOfTitle+2),
                  '('+genre+'), rating:', str(rating)+',', str(year))
        else:
            runtime = TopRatedUnwatchedDF.iloc[i,4]
            print(('"'+title+'"').ljust(maxLengthOfTitle+2),
                  '('+genre+'), rating:', str(rating)+',', str(year)+', runs', runtime)
           
    return 

## Main Function
The main function that is called to start the program. 

In [4]:

def main():   
    import os
    import pandas as pd
    arg = input('Please enter the name of the folder with files, the name of movies file,\nthe name of critics file, the name of personal ratings file, separated by spaces:\n')
    l = list(arg.split())
    orig_dir = os.getcwd()
    
    os.chdir(orig_dir+'\\'+l[0])
       
    movieDF = pd.read_csv(l[1], encoding = 'latin1')
    criticsRatingDF = pd.read_csv(l[2], encoding = 'latin1')
    personalRatingDF = pd.read_csv(l[3], encoding = 'latin1')
       
    criticsList = findClosestCritics(criticsRatingDF, personalRatingDF)
    print('\n',criticsList,'\n')
       
    TopRatedUnwatchedDF = recommendMovies(personalRatingDF, criticsRatingDF, criticsList, movieDF)
      
    personName = personalRatingDF.columns[1]
    printRecommendations(TopRatedUnwatchedDF, personName)
    os.chdir(orig_dir)
   
    return

## Output:

In [11]:
main() # Enter: data-tiny  tinyIMDB.csv tinyratings.csv tinyp.csv

Please enter the name of the folder with files, the name of movies file,
the name of critics file, the name of personal ratings file, separated by spaces:
data-tiny  tinyIMDB.csv tinyratings.csv tinyp.csv

 ['Aldbridge', 'Moon', 'Benris'] 

Recommendations for Kimberwick :
"127 Hours"  (Adventure), rating: 8.0, 2010, runs 94 min
"50/50"      (Comedy), rating: 7.0, 2011, runs 100 min
"About Time" (Comedy), rating: 7.0, 2013, runs 123 min


In [12]:
main() # Enter: data  IMDB.csv ratings.csv   p8.csv

Please enter the name of the folder with files, the name of movies file,
the name of critics file, the name of personal ratings file, separated by spaces:
data  IMDB.csv ratings.csv   p8.csv

 ['Quartermaine', 'Arvon', 'Merrison'] 

Recommendations for Catulpa :
"Star Wars: The Force Awakens"    (Action), rating: 9.67, 2015, runs 136 min
"The Grand Budapest Hotel"        (Adventure), rating: 9.0, 2014, runs 99 min
"The Martian"                     (Adventure), rating: 9.0, 2015, runs 144 min
"How to Train Your Dragon"        (Animation), rating: 9.67, 2010
"Kubo and the Two Strings"        (Animation), rating: 9.67, 2016
"Hacksaw Ridge"                   (Biography), rating: 9.33, 2016, runs 139 min
"What We Do in the Shadows"       (Comedy), rating: 9.0, 2014
"Prisoners"                       (Crime), rating: 8.33, 2013, runs 153 min
"Spotlight"                       (Crime), rating: 8.33, 2015, runs 128 min
"The Perks of Being a Wallflower" (Drama), rating: 9.67, 2012, runs 102 min
"