In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import skcriteria as skc
from mpl_toolkits.axes_grid1 import make_axes_locatable
from sklearn.cluster import KMeans
from sklearn.metrics import mean_squared_error
import itertools
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import silhouette_samples, silhouette_score
from scipy.sparse import csr_matrix
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
%matplotlib inline

In [4]:
# Import the Movies dataset
movies = pd.read_csv('movies.csv')
credits = pd.read_csv('credits.csv')
movies.head(2)

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2009-12-10,2787965087,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2007-05-19,961000000,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500


In [6]:
movies.drop(['homepage'], axis = 1,inplace = True)

In [7]:
movies = movies.merge(credits,on='title')
movies.head(2)

Unnamed: 0,budget,genres,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,movie_id,cast,crew
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,19995,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,285,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."


In [8]:
movies = movies[['movie_id','title','overview','genres','keywords','cast','crew','budget','popularity','revenue','runtime','vote_count']]
movies.head()

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew,budget,popularity,revenue,runtime,vote_count
0,19995,Avatar,"In the 22nd century, a paraplegic Marine is di...","[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...","[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de...",237000000,150.437577,2787965087,162.0,11800
1,285,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...","[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...","[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...","[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de...",300000000,139.082615,961000000,169.0,4500
2,206647,Spectre,A cryptic message from Bond’s past sends him o...,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...","[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de...",245000000,107.376788,880674609,148.0,4466
3,49026,The Dark Knight Rises,Following the death of District Attorney Harve...,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...","[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...","[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de...",250000000,112.31295,1084939099,165.0,9106
4,49529,John Carter,"John Carter is a war-weary, former military ca...","[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 818, ""name"": ""based on novel""}, {""id"":...","[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de...",260000000,43.926995,284139100,132.0,2124


In [9]:
import ast

In [10]:
def convert(text):
    L = []
    for i in ast.literal_eval(text):
        L.append(i['name']) 
    return L 

In [11]:
movies.dropna(inplace=True)

In [12]:
movies['genres'] = movies['genres'].apply(convert)
movies.head()

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew,budget,popularity,revenue,runtime,vote_count
0,19995,Avatar,"In the 22nd century, a paraplegic Marine is di...","[Action, Adventure, Fantasy, Science Fiction]","[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...","[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de...",237000000,150.437577,2787965087,162.0,11800
1,285,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...","[Adventure, Fantasy, Action]","[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...","[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de...",300000000,139.082615,961000000,169.0,4500
2,206647,Spectre,A cryptic message from Bond’s past sends him o...,"[Action, Adventure, Crime]","[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...","[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de...",245000000,107.376788,880674609,148.0,4466
3,49026,The Dark Knight Rises,Following the death of District Attorney Harve...,"[Action, Crime, Drama, Thriller]","[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...","[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de...",250000000,112.31295,1084939099,165.0,9106
4,49529,John Carter,"John Carter is a war-weary, former military ca...","[Action, Adventure, Science Fiction]","[{""id"": 818, ""name"": ""based on novel""}, {""id"":...","[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de...",260000000,43.926995,284139100,132.0,2124


In [13]:
movies['keywords'] = movies['keywords'].apply(convert)
movies.head(2)

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew,budget,popularity,revenue,runtime,vote_count
0,19995,Avatar,"In the 22nd century, a paraplegic Marine is di...","[Action, Adventure, Fantasy, Science Fiction]","[culture clash, future, space war, space colon...","[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de...",237000000,150.437577,2787965087,162.0,11800
1,285,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...","[Adventure, Fantasy, Action]","[ocean, drug abuse, exotic island, east india ...","[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de...",300000000,139.082615,961000000,169.0,4500


In [14]:
ast.literal_eval('[{"id": 28, "name": "Action"}, {"id": 12, "name": "Adventure"}, {"id": 14, "name": "Fantasy"}, {"id": 878, "name": "Science Fiction"}]')

[{'id': 28, 'name': 'Action'},
 {'id': 12, 'name': 'Adventure'},
 {'id': 14, 'name': 'Fantasy'},
 {'id': 878, 'name': 'Science Fiction'}]

In [15]:
def convert3(text):
    L = []
    counter = 0
    for i in ast.literal_eval(text):
        if counter < 3:
            L.append(i['name'])
        counter+=1
    return L
movies['cast'] = movies['cast'].apply(convert)

In [16]:
movies['cast'] = movies['cast'].apply(lambda x:x[0:3])

In [17]:
def fetch_director(text):
    L = []
    for i in ast.literal_eval(text):
        if i['job'] == 'Director':
            L.append(i['name'])
    return L 

In [18]:
movies['crew'] = movies['crew'].apply(fetch_director)

In [19]:
movies.sample(5)

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew,budget,popularity,revenue,runtime,vote_count
2661,9288,Dungeons & Dragons: Wrath of the Dragon God,"Due to a curse from his former master Profion,...","[Action, Adventure, Fantasy]","[fighter, royalty, curse, dragon]","[Bruce Payne, Mark Dymond, Clemency Burton-Hill]",[Gerry Lively],0,4.901936,0,105.0,36
2099,16290,Jackass 3D,Jackass 3D is a 3-D film and the third movie o...,"[Comedy, Documentary, Action]","[pain, stunts, stuntman, stupidity, comedy, du...","[Johnny Knoxville, Bam Margera, Ryan Dunn]",[Jeff Tremaine],20000000,20.99174,117224271,94.0,428
1946,72113,Carnage,After 11-year-old Zachary Cowan strikes his cl...,"[Comedy, Drama]","[flat, mobile phone, hamster, insult, dark com...","[Kate Winslet, Jodie Foster, Christoph Waltz]",[Roman Polanski],25000000,20.011435,27603069,80.0,738
4533,33430,Twin Falls Idaho,Francis and Blake Falls are Siamese twins who ...,"[Drama, Romance]",[independent film],"[Mark Polish, Michael Polish, Michele Hicks]",[Michael Polish],0,0.619136,0,111.0,11
1654,58151,Fright Night,A teenager suspects his new neighbour is a vam...,"[Horror, Comedy]","[sunrise, vampire, suspicion, remake, suburbia]","[Anton Yelchin, Colin Farrell, Toni Collette]",[Craig Gillespie],17000000,18.27083,24922237,106.0,603


In [20]:
def collapse(L):
    L1 = []
    for i in L:
        L1.append(i.replace(" ",""))
    return L1

In [21]:
movies['cast'] = movies['cast'].apply(collapse)
movies['crew'] = movies['crew'].apply(collapse)
movies['genres'] = movies['genres'].apply(collapse)
movies['keywords'] = movies['keywords'].apply(collapse)

In [22]:
movies.head(2)

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew,budget,popularity,revenue,runtime,vote_count
0,19995,Avatar,"In the 22nd century, a paraplegic Marine is di...","[Action, Adventure, Fantasy, ScienceFiction]","[cultureclash, future, spacewar, spacecolony, ...","[SamWorthington, ZoeSaldana, SigourneyWeaver]",[JamesCameron],237000000,150.437577,2787965087,162.0,11800
1,285,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...","[Adventure, Fantasy, Action]","[ocean, drugabuse, exoticisland, eastindiatrad...","[JohnnyDepp, OrlandoBloom, KeiraKnightley]",[GoreVerbinski],300000000,139.082615,961000000,169.0,4500


In [23]:
movies['overview'] = movies['overview'].apply(lambda x:x.split())

In [24]:
movies.head(2)

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew,budget,popularity,revenue,runtime,vote_count
0,19995,Avatar,"[In, the, 22nd, century,, a, paraplegic, Marin...","[Action, Adventure, Fantasy, ScienceFiction]","[cultureclash, future, spacewar, spacecolony, ...","[SamWorthington, ZoeSaldana, SigourneyWeaver]",[JamesCameron],237000000,150.437577,2787965087,162.0,11800
1,285,Pirates of the Caribbean: At World's End,"[Captain, Barbossa,, long, believed, to, be, d...","[Adventure, Fantasy, Action]","[ocean, drugabuse, exoticisland, eastindiatrad...","[JohnnyDepp, OrlandoBloom, KeiraKnightley]",[GoreVerbinski],300000000,139.082615,961000000,169.0,4500


In [25]:
movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']

In [26]:
new = movies.drop(columns=['overview','genres','keywords','cast','crew'])

In [27]:
new['tags'] = new['tags'].apply(lambda x: " ".join(x))
new.head()

Unnamed: 0,movie_id,title,budget,popularity,revenue,runtime,vote_count,tags
0,19995,Avatar,237000000,150.437577,2787965087,162.0,11800,"In the 22nd century, a paraplegic Marine is di..."
1,285,Pirates of the Caribbean: At World's End,300000000,139.082615,961000000,169.0,4500,"Captain Barbossa, long believed to be dead, ha..."
2,206647,Spectre,245000000,107.376788,880674609,148.0,4466,A cryptic message from Bond’s past sends him o...
3,49026,The Dark Knight Rises,250000000,112.31295,1084939099,165.0,9106,Following the death of District Attorney Harve...
4,49529,John Carter,260000000,43.926995,284139100,132.0,2124,"John Carter is a war-weary, former military ca..."


In [44]:
def get_title_from_index(index):
    indexMovie = movies[movies.index == index]["title"].values[0]  
    return indexMovie

def get_index_from_title(title):
    return movies[movies['title'] == title].index[0]

def similar_movies(movie_index):
    listofMovies = list(enumerate(cosineSim[movie_index]))
    return listofMovies
def sorted_similar_movies(similar_movies):
    sortedMov = sorted(similar_movies, key=lambda x:x[1],reverse=True)
    return sortedMov
def getIdfromTitle(title):
    movId = movies[movies['title'] == title]["movie_id"].values[0]
    return movId

#creating a Matrix of the combined Features
cv = CountVectorizer(max_features=5000,stop_words='english')
count_matrix = cv.fit_transform(new['tags'])
#Create a Similiarity on the Matrix
cosineSim = cosine_similarity(count_matrix)

In [46]:
movId = getIdfromindex('Avatar')
movId

19995

In [30]:
def getMostSimilar(sorted_similar_movies):
    i = 1
    listMovies = []
    for movie in sorted_similar_movies:
        sortedMovies = get_title_from_index(movie[0])        
        listMovies.append(sortedMovies)
        i = i +1
        if i>10:
            break
    
    return listMovies

In [31]:
def cosimilarity(movie_title):
    movie_index = get_index_from_title(movie_title)    
    simMov = similar_movies(movie_index)
    sorted_simMov = sorted_similar_movies(simMov)
    chk = getMostSimilar(sorted_simMov)
    corAltMovs = chk.pop(0)
    return chk

In [32]:
cosimilarity('Superman')

['Superman II',
 'Superman Returns',
 'Superman III',
 'Superman IV: The Quest for Peace',
 'Man of Steel',
 'Iron Man 2',
 'Batman v Superman: Dawn of Justice',
 'Iron Man 3',
 'X-Men']

In [None]:
movies[movies.index == index]["id"].values[0]

In [33]:
def retMovList(movie_title):
    movsList = cosimilarity(movie_title)    
    list = []
    for movie in movsList:        
        movie_index = get_index_from_title(movie)
        list.append(movie_index)
    return list


In [34]:
retMovList('Superman')

[870, 10, 1299, 2439, 14, 79, 9, 31, 511]

In [35]:
def getMoviesforTopsis(movie_title):
    movieList = retMovList(movie_title)
    re = new.iloc[movieList]  
    re2 = re.iloc[: , :]    
    data = re2.drop([ 'movie_id','title','tags'], axis = 1)
    return data

In [36]:
getMoviesforTopsis('Superman')

Unnamed: 0,budget,popularity,revenue,runtime,vote_count
870,54000000,30.515175,190458706,127.0,629
10,270000000,57.925623,391081192,154.0,1400
1299,39000000,22.164202,75850624,125.0,490
2439,17000000,17.062117,19300000,90.0,318
14,225000000,99.398009,662845518,143.0,6359
79,200000000,77.300194,623933331,124.0,6849
9,250000000,155.790452,873260194,151.0,7004
31,200000000,77.68208,1215439994,130.0,8806
511,75000000,4.66891,296339527,104.0,4097


In [37]:
def getmovieTitles(movie_title):
    data = retMovList(movie_title)    
    altMovies= []     
    for movie in data:
        mov = get_title_from_index(movie)
        altMovies.append(mov)
    return altMovies

In [98]:
def getMCRSmovieTitles(movresultIndex):
    movsSortedResult = movresultIndex 
    altMovies= [] 
    for movie in movsSortedResult:
        mov = get_title_from_index(movie)
        altMovies.append(mov)
    return altMovies

In [99]:
def getMCRSReccommend(movie_title):
    data = getMoviesforTopsis(movie_title)
    #Define the weights to be used for the topsis 
    w = [0.3,0.2,0.2,0.15,0.15]
    #Data Normalisation
    data_norm = data/np.sqrt(np.power(data,2).sum(axis=0))
    #Multiply the Normalised Data with the weight
    data_normW = data_norm*w
    #Get the Highest and Lowest Ideal Alternatives from the Normalised Data 
    positive_ideal = data_normW.max()
    negative_ideal = data_normW.min()
    #Calculate the Positive and Negative Ideals
    SM_P = np.sqrt(np.power(data_normW - positive_ideal,2).sum(axis=1))
    SM_N = np.sqrt(np.power(data_normW - negative_ideal,2).sum(axis=1))
    #Get the Result of the Calculations
    result = pd.DataFrame(SM_N/(SM_N+SM_P))
    result.columns = ['AltMovie']
    sortedResult = result.sort_values(['AltMovie'], ascending= False)
    movresultIndex = sortedResult.index
    sortMovieTitles = getMCRSmovieTitles(movresultIndex)
    sortedResult['title'] = np.array(sortMovieTitles)
    
    return sortedResult

In [100]:
getMCRSReccommend('Superman')

Unnamed: 0,AltMovie,title
9,0.837704,Batman v Superman: Dawn of Justice
31,0.71185,Iron Man 3
14,0.668346,Man of Steel
79,0.593723,Iron Man 2
10,0.524695,Superman Returns
511,0.220313,X-Men
870,0.153561,Superman II
1299,0.097201,Sweet Home Alabama
2439,0.042616,Roar


In [40]:
import pickle

In [41]:
movieDict = new.to_dict()

In [42]:
pickle.dump(movieDict,open('movies_dict.pkl','wb'))
pickle.dump(cosineSim, open('similarity.pkl','wb'))