In [15]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [16]:
movie_user_likes = "Avatar"

In [3]:
###### helper functions. Use them when needed #######
def get_title_from_index(index):
	return df[df.index == index]["title"].values[0]

def get_index_from_title(title):
	return df[df.title == title]["index"].values[0]

In [9]:
##Step 1: Read CSV File
df = pd.read_csv("D:/GL/data/movie recommendation engine with python/movie_dataset.csv")
df.dtypes

index                     int64
budget                    int64
genres                   object
homepage                 object
id                        int64
keywords                 object
original_language        object
original_title           object
overview                 object
popularity              float64
production_companies     object
production_countries     object
release_date             object
revenue                   int64
runtime                 float64
spoken_languages         object
status                   object
tagline                  object
title                    object
vote_average            float64
vote_count                int64
cast                     object
crew                     object
director                 object
dtype: object

In [11]:
##Step 2: Select Features
features = ['keywords','cast','genres','director']
for feature in features:
    df[feature] = df[feature].fillna('')

In [13]:
##Step 3: Create a column in DF which combines all selected features
def combine_features(row):
    return row['keywords']+" "+row['cast']+" "+row['genres']+" "+row['director']
df["combined_features"]=df.apply(combine_features,axis=1)

In [14]:
df["combined_features"]

0       culture clash future space war space colony so...
1       ocean drug abuse exotic island east india trad...
2       spy based on novel secret agent sequel mi6 Dan...
3       dc comics crime fighter terrorist secret ident...
4       based on novel mars medallion space travel pri...
5       dual identity amnesia sandstorm love of one's ...
6       hostage magic horse fairy tale musical Zachary...
7       marvel comic sequel superhero based on comic b...
8       witch magic broom school of witchcraft wizardr...
9       dc comics vigilante superhero based on comic b...
10      saving the world dc comics invulnerability seq...
11      killing undercover secret agent british secret...
12      witch fortune teller bondage exotic island mon...
13      texas horse survivor texas ranger partner John...
14      saving the world dc comics superhero based on ...
15      based on novel fictional place brother sister ...
16      new york shield marvel comic superhero based o...
17      sea ca

In [23]:
cv = CountVectorizer()
count_matrix = cv.fit_transform(df["combined_features"])
print(count_matrix.toarray())

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [24]:
similarity_score = cosine_similarity(count_matrix)
print(similarity_score)

[[1.         0.10540926 0.12038585 ... 0.         0.         0.        ]
 [0.10540926 1.         0.0761387  ... 0.03651484 0.         0.        ]
 [0.12038585 0.0761387  1.         ... 0.         0.11145564 0.        ]
 ...
 [0.         0.03651484 0.         ... 1.         0.         0.04264014]
 [0.         0.         0.11145564 ... 0.         1.         0.        ]
 [0.         0.         0.         ... 0.04264014 0.         1.        ]]


In [25]:
similarity_score.shape

(4803, 4803)

In [26]:
similarity_score[0]

array([1.        , 0.10540926, 0.12038585, ..., 0.        , 0.        ,
       0.        ])

In [40]:
movie_index = get_index_from_title(movie_user_likes)
similar_movies = list(enumerate(similarity_score[movie_index]))
sorted_similar_movies = sorted(similar_movies,key= lambda x:x[1],reverse=True)

In [43]:
i=0
for movie in sorted_similar_movies:
    print(get_title_from_index(movie[0]))
    i = i+1
    if i>50:
        break

Avatar
Guardians of the Galaxy
Aliens
Star Wars: Clone Wars: Volume 1
Star Trek Into Darkness
Star Trek Beyond
Alien
Lockout
Jason X
The Helix... Loaded
Moonraker
Planet of the Apes
Galaxy Quest
Gravity
AlienÂ³
Jupiter Ascending
The Wolverine
Silent Running
Zathura: A Space Adventure
Trekkies
Cargo
Wing Commander
Star Trek
Lost in Space
Babylon A.D.
The Fifth Element
Oblivion
Titan A.E.
AVP: Alien vs. Predator
The Empire Strikes Back
Dragonball Evolution
Superman Returns
Divergent
John Carter
The Black Hole
The Ice Pirates
Memoirs of an Invisible Man
Starship Troopers
The Astronaut's Wife
Machete Kills
Soldier
The Abyss
Damnation Alley
Men in Black
Space Cowboys
Space Dogs
The Time Machine
Sheena
Captain America: Civil War
Star Trek: Insurrection
Oz: The Great and Powerful
