# Simple Recommender
we simply calculate the score for the IMDB movies and recommend for high score movies

In [1]:
import pandas as pd

class SimpleRecommender:
    
    def __init__(self, database):
        path_database = database
        self.df = pd.read_csv(path_database, low_memory=True)
        #print("keys:", self.df.keys())
        
    def precondition(self, df, quantile_num=0.80, runtime=[45,300]):
        
        """Function to ruled-out some movie by precondition

        Args:
            df(object): the dataframe(pandas), which is the dataset
            quantile_num(float): the number to indicate the 80th percentile
            runtime(array): rumtime movies longer than array[0] minutes and shorter than array[1] minutes
            
        Return:
            df_q_movies(object): the movies that made the cut
            
        """
        #m (int): the minimum number of votes requred for the movie to be in the chart
        self.m = df['vote_count'].quantile(quantile_num)
        #C (float): the mean rating of all the movies in the dataset
        self.C = df['vote_average'].mean()
        
        #Only consider some movies with runtime conditions 
        df_q_movies = df[(df['runtime'] >= runtime[0]) & (df['runtime'] <= runtime[1])]

        #Only consider movies that have garnered more than m votes
        df_q_movies = df_q_movies[df_q_movies['vote_count'] >= self.m]
        
        return df_q_movies
    
    def weighted_rating(self, df):
        
        """Function to compute the IMDB weighted rating for each movie

        Args:
            df(object): the dataframe(pandas), which is the dataset
        
        Return:
            weight_score(float): the weighted score
            
        """
        v = df['vote_count']
        R = df['vote_average']
        weight_score = (v/(v+self.m) * R) + (self.m/(self.m+v) * self.C)

        return weight_score
    
    def main(self, see_top =25):
        
        """Function to recommend movies from IMDB database relative to the weight_score 

        Args:
            see_top(int): number of movies to display on recommedation 
        
        Return:
            recommendation list(object)
        """
        
        self.df_q_movies = self.precondition(self.df)
        self.df_q_movies['score'] = self.df_q_movies.apply(self.weighted_rating, axis=1)
        #Sort movies in descending order of their scores
        self.df_q_movies = self.df_q_movies.sort_values('score', ascending=False)

        #Print the top 25 movies
        return self.df_q_movies[['title', 'vote_count', 'vote_average', 'score', 'runtime']].head(see_top)

In [2]:
#set the CSV File into df
database = r'C:\Users\MMIL\Panithan\Git_projects\movies_metadata.csv'

top_score= SimpleRecommender(database)
top_score.main(see_top =5)

  self.df = pd.read_csv(path_database, low_memory=True)


Unnamed: 0,title,vote_count,vote_average,score,runtime
10309,Dilwale Dulhania Le Jayenge,661.0,9.1,8.855148,190.0
314,The Shawshank Redemption,8358.0,8.5,8.482863,142.0
834,The Godfather,6024.0,8.5,8.476278,175.0
40251,Your Name.,1030.0,8.5,8.366584,106.0
12481,The Dark Knight,12269.0,8.3,8.289115,152.0


In [3]:
top_score.main(see_top =10)

Unnamed: 0,title,vote_count,vote_average,score,runtime
10309,Dilwale Dulhania Le Jayenge,661.0,9.1,8.855148,190.0
314,The Shawshank Redemption,8358.0,8.5,8.482863,142.0
834,The Godfather,6024.0,8.5,8.476278,175.0
40251,Your Name.,1030.0,8.5,8.366584,106.0
12481,The Dark Knight,12269.0,8.3,8.289115,152.0
2843,Fight Club,9678.0,8.3,8.286216,139.0
292,Pulp Fiction,8670.0,8.3,8.284623,154.0
522,Schindler's List,4436.0,8.3,8.270109,195.0
23673,Whiplash,4376.0,8.3,8.269704,105.0
5481,Spirited Away,3968.0,8.3,8.266628,125.0
