# Movie Recommender System

### Simple Recommender 
Based on popularity

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import accuracy_score, classification_report

In [None]:
df = pd.read_csv("movies_metadata.csv")
df.head()

In [None]:
df.shape

In [None]:
from ast import literal_eval

df['genres'] = df['genres'].fillna('[]').apply(literal_eval).apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])
df['year'] = pd.to_datetime(df['release_date'], errors='coerce').apply(lambda x: str(x).split('-')[0] if x != np.nan else np.nan)

In [None]:
def top_movies_charts(genre, percentile=0.95):
    t_df = df
    if genre != 'All':
        s = t_df.apply(lambda x: pd.Series(x['genres']), axis=1).stack().reset_index(level=1, drop=True)
        s.name = 'genre'
        t_df = t_df.drop('genres', axis=1).join(s)
        t_df = t_df[t_df['genre'] == genre]
    vote_counts = t_df[t_df['vote_count'].notnull()]['vote_count'].astype('int')
    vote_averages = t_df[t_df['vote_average'].notnull()]['vote_average'].astype('int')
    C = vote_averages.mean()
    m = vote_counts.quantile(percentile)

    cols = ['title', 'year', 'vote_count', 'vote_average', 'popularity']
    if genre == 'All':
        cols.append('genres')
    qualified_df = t_df[(t_df['vote_count'] >= m) & (t_df['vote_count'].notnull()) & (t_df['vote_average'].notnull())][cols]
    qualified_df['vote_count'] = qualified_df['vote_count'].astype('int')
    qualified_df['vote_average'] = qualified_df['vote_average'].astype('int')

    qualified_df['wr'] = qualified_df.apply(lambda x: (x['vote_count']/(x['vote_count']+m) * x['vote_average']) + (m/(m+x['vote_count']) * C), axis=1)
    qualified_df = qualified_df.sort_values('wr', ascending=False)
    return qualified_df


In [None]:
top_movies_charts('All').head(10)

In [86]:
top_movies_charts('Romance', 0.85).head(10)

Unnamed: 0,title,year,vote_count,vote_average,popularity,wr
10309,Dilwale Dulhania Le Jayenge,1995,661,9,34.457,8.565285
351,Forrest Gump,1994,8147,8,48.3072,7.971357
876,Vertigo,1958,1162,8,18.2082,7.811667
40251,Your Name.,2016,1030,8,34.461252,7.789489
883,Some Like It Hot,1959,835,8,11.8451,7.745154
1132,Cinema Paradiso,1988,834,8,14.177,7.744878
19901,Paperman,2012,734,8,7.19863,7.713951
37863,Sing Street,2016,669,8,10.672862,7.689483
882,The Apartment,1960,498,8,11.9943,7.599317
38718,The Handmaiden,2016,453,8,16.727405,7.566166


In [None]:
'''
Inspiration
1. https://www.kaggle.com/rounakbanik/movie-recommender-systems
'''