In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Loading Movie Lens datafiles

In [2]:
movies=pd.read_csv("dataset2/movies.csv")
links =pd.read_csv("dataset2/links.csv")
ratings=pd.read_csv("dataset2/ratings.csv")

#### DataFrame for filters

In [3]:
tags=pd.read_csv("dataset2/tags.csv")
genome_scores =pd.read_csv("dataset2/genome-scores.csv")
genome_tags =pd.read_csv("dataset2/genome-tags.csv")

### Genome Tagging

1,128 tags for 13,176 unique movies.

Tag genome records how strongly each tag applies to each movie on a continuous scale from 0 to 1.

0 = does not applies at 1 = applies very strongly


In [4]:
genome = pd.merge(left=genome_scores, right=genome_tags, left_on='tagId', right_on='tagId')

In [5]:
genome.sample(5)

Unnamed: 0,movieId,tagId,relevance,tag
11464159,1150,871,0.11725,sad
5133109,27706,390,0.06175,fighting
14525861,6770,1103,0.122,weed
8499075,582,646,0.387,mentor
2373568,2124,181,0.04,caper


In [6]:
links.head(5)

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0


In [7]:
ratings.head(5)
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,307,3.5,1256677221
1,1,481,3.5,1256677456
2,1,1091,1.5,1256677471
3,1,1257,4.5,1256677460
4,1,1449,4.5,1256677264
...,...,...,...,...
27753439,283228,8542,4.5,1379882795
27753440,283228,8712,4.5,1379882751
27753441,283228,34405,4.5,1379882889
27753442,283228,44761,4.5,1354159524


In [86]:
tags.head(5)
tags

Unnamed: 0,userId,movieId,tag,timestamp
0,14,110,epic,1443148538
1,14,110,Medieval,1443148532
2,14,260,sci-fi,1442169410
3,14,260,space action,1442169421
4,14,318,imdb top 250,1442615195
...,...,...,...,...
1108992,283206,73017,fun,1264379059
1108993,283206,73017,homoerotic subtext,1264379058
1108994,283206,73017,pacing,1264379058
1108995,283206,73017,plot,1264379058


# Genre Based Recommendation System

In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english')


In [15]:
tf.get_feature_names()

['action',
 'action adventure',
 'action animation',
 'action children',
 'action comedy',
 'action crime',
 'action documentary',
 'action drama',
 'action fantasy',
 'action film',
 'action horror',
 'action imax',
 'action musical',
 'action mystery',
 'action romance',
 'action sci',
 'action thriller',
 'action war',
 'action western',
 'adventure',
 'adventure animation',
 'adventure children',
 'adventure comedy',
 'adventure crime',
 'adventure documentary',
 'adventure drama',
 'adventure fantasy',
 'adventure film',
 'adventure horror',
 'adventure imax',
 'adventure musical',
 'adventure mystery',
 'adventure romance',
 'adventure sci',
 'adventure thriller',
 'adventure war',
 'adventure western',
 'animation',
 'animation children',
 'animation comedy',
 'animation crime',
 'animation documentary',
 'animation drama',
 'animation fantasy',
 'animation film',
 'animation horror',
 'animation musical',
 'animation mystery',
 'animation romance',
 'animation sci',
 'animation

In [None]:
tfidf_matrix = tf.fit_transform(movies['genres'])

In [9]:
cosine_simmilarity = linear_kernel(tfidf_matrix, tfidf_matrix)

In [10]:
titles = movies['title']
indices = pd.Series(movies.index, index=movies['title'])

In [22]:
def recommend_movies(title):
    idx = indices[title]
    scores = list(enumerate(cosine_simmilarity[idx]))
    scores = sorted(scores, key=lambda x: x[1], reverse=True)
    scores = scores[1:100]
    movie_indices = [i[0] for i in scores]
    return titles.iloc[movie_indices]

In [24]:
recommend_movies('Toy Story (1995)').head(10)

2210                                           Antz (1998)
3028                                    Toy Story 2 (1999)
3664        Adventures of Rocky and Bullwinkle, The (2000)
3923                      Emperor's New Groove, The (2000)
4791                                 Monsters, Inc. (2001)
10130    DuckTales: The Movie - Treasure of the Lost La...
11009                                     Wild, The (2006)
11899                               Shrek the Third (2007)
13378                       Tale of Despereaux, The (2008)
18403    Asterix and the Vikings (Astérix et les Viking...
Name: title, dtype: object

In [25]:
recommend_movies('Jumanji (1995)').head(10)

59                     Indian in the Cupboard, The (1995)
124                     NeverEnding Story III, The (1994)
990                       Escape to Witch Mountain (1975)
1960            Darby O'Gill and the Little People (1959)
2010                                  Return to Oz (1985)
2078                        NeverEnding Story, The (1984)
2079    NeverEnding Story II: The Next Chapter, The (1...
2315                        Santa Claus: The Movie (1985)
4801    Harry Potter and the Sorcerer's Stone (a.k.a. ...
9719                            Magic in the Water (1995)
Name: title, dtype: object

In [30]:
recommend_movies('Titanic (1997)').head(10)

24                            Leaving Las Vegas (1995)
27                                   Persuasion (1995)
34                                   Carrington (1995)
45                How to Make an American Quilt (1995)
48                        When Night Is Falling (1995)
73                                 Bed of Roses (1996)
82     Once Upon a Time... When We Were Colored (1995)
84                           Angels and Insects (1995)
103              Bridges of Madison County, The (1995)
129                           Frankie Starlight (1995)
Name: title, dtype: object

In [34]:
movies[movies.title.str.contains('Persuasion') | (movies.title.str.contains('Carrington'))]

Unnamed: 0,movieId,title,genres
27,28,Persuasion (1995),Drama|Romance
34,35,Carrington (1995),Drama|Romance
3953,4046,Friendly Persuasion (1956),Drama
10330,34540,Pretty Persuasion (2005),Comedy|Drama
14944,74508,Persuasion (2007),Drama|Romance
45091,164805,Persuasion (1971),(no genres listed)
52561,181137,Rodney Carrington: Here Comes The Truth (2017),Comedy
