## __Content Based Recommendation System__

In [41]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [42]:
df = pd.read_csv('movies.csv')
df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [43]:
# Helper functions
def get_title_from_index(index):
    return df[df.index == index]['title'].values[0]

def get_index_from_title(title):
    return df[df.title == title].index.values[0]

In [44]:
# select necessary feature for content based recommendation system
features = ['genres']

In [45]:
# filling missing value in features
for feature in features:
    df[feature] = df[feature].fillna('')

In [47]:
# create column which combines all selected features
def combine_feature(row):
    return row['genres']

df['combined_features'] = df.apply(combine_feature, axis=1)
df['combined_features'].head()

0    Adventure|Animation|Children|Comedy|Fantasy
1                     Adventure|Children|Fantasy
2                                 Comedy|Romance
3                           Comedy|Drama|Romance
4                                         Comedy
Name: combined_features, dtype: object

In [48]:
# creating count matrix from combined column
cv = CountVectorizer()
count_matrix = cv.fit_transform(df['combined_features'])

In [49]:
# compute Cosine Similarity
cosine_sim = cosine_similarity(count_matrix)

In [50]:
# User preference content
movie_user_likes = 'Superman vs. The Elite (2012)'

In [51]:
# getting index of the movie
movie_index = get_index_from_title(movie_user_likes)

similar_movies = list(enumerate(cosine_sim[movie_index]))

In [52]:
# get a list of similar movie 
sorted_similar_movies = sorted(similar_movies,
                               key=lambda x:x[1], reverse=True)

In [53]:
# Movie recommendation for Joko
i=0
for movie in sorted_similar_movies:
    print(get_title_from_index(movie[0]))
    i=i+1
    if i>5:
        break

Street Fighter II: The Animated Movie (Sutorîto Faitâ II gekijô-ban) (1994)
Batman: Under the Red Hood (2010)
Superman vs. The Elite (2012)
Batman: The Dark Knight Returns, Part 2 (2013)
Justice League: Throne of Atlantis (2015)
Justice League: Gods and Monsters (2015)
