# Movie Recommendation System

* Demographic Filtering
* Content-Based Filtering
* Collaborative Filtering

## Demographic Filtering

In [1]:
import pandas as pd
import numpy as np

data_frame_1 = pd.read_csv("tmdb_5000_credits.csv")
data_frame_2 = pd.read_csv("tmdb_5000_movies.csv")

In [2]:
data_frame_1.columns = ["id", "title", "cast", "crew"]

In [3]:
data_frame_2 = data_frame_2.merge(data_frame_1[["id", "cast", "crew"]], on = "id")

In [4]:
C = data_frame_2["vote_average"].mean()
m = data_frame_2["vote_count"].quantile(0.9)

In [5]:
q_movies = data_frame_2.copy().loc[data_frame_2["vote_count"] >= m]

In [6]:
def weighted_rating(x, m = m, C = C):
    v = x["vote_count"]
    R = x["vote_average"]
    return (v / (v + m) * R) + (m / (m + v) * C)

In [7]:
q_movies["score"] = q_movies.apply(weighted_rating, axis = 1)

In [8]:
q_movies = q_movies.sort_values("score", ascending = False)

## Content-Based Filtering (Story)

In [9]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [10]:
tfidf = TfidfVectorizer(stop_words = "english")

In [11]:
data_frame_2["overview"] = data_frame_2["overview"].fillna("")

In [12]:
tfidf_matrix = tfidf.fit_transform(data_frame_2["overview"])

In [13]:
from sklearn.metrics.pairwise import linear_kernel

In [14]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [15]:
indices = pd.Series(data_frame_2.index, index = data_frame_2["title"]).drop_duplicates()

In [16]:
def get_recommendations(title, cosine_sim = cosine_sim):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key = lambda x: x[1], reverse = True)
    sim_scores = sim_scores[1 : 11]
    movie_indices = [i[0] for i in sim_scores]
    
    return data_frame_2["title"].iloc[movie_indices]