In [1]:
pip install gensim

Note: you may need to restart the kernel to use updated packages.


In [5]:
import pandas as pd
import numpy as np
from gensim.models import Word2Vec
from sklearn.metrics.pairwise import cosine_similarity

In [6]:
data = pd.read_csv('movies.csv')
X = np.array(data.genres)
text_data = X

In [9]:
tokenized_data = [text.split() for text in text_data]
model = Word2Vec(tokenized_data, min_count=1)

In [10]:
# Embedding function
def embed_text(text):
    embedded_vector = []
    for word in text:
        if word in model.wv:
            embedded_vector.append(model.wv[word])
    if len(embedded_vector) > 0:
        return np.mean(embedded_vector, axis=0)
    else:
        return np.zeros(model.vector_size)

In [11]:
# Compute embeddings for the text data
embeddings = np.array([embed_text(text) for text in tokenized_data])

In [12]:
# Compute cosine similarity
cos_sim_data = pd.DataFrame(cosine_similarity(embeddings))

In [13]:
# Define recommendation function
def give_recommendations(index, print_recommendation=False, print_genres=False):
    index_recomm = cos_sim_data.loc[index].sort_values(ascending=False).index.tolist()[1:11]
    movies_recomm = data['title'].loc[index_recomm].values
    movies_recomm_genres = data['genres'].loc[index_recomm].values
    result = {'Movies': movies_recomm, 'Index': index_recomm}
    if print_recommendation:
        print('The watched movie is: %s, its genres: %r\n' % (data['title'].loc[index], data['genres'].loc[index]))
        for i in range(len(movies_recomm)):
            print('Rank %i recommended movie is: %s, its genres: %r\n' % (i+1, movies_recomm[i], movies_recomm_genres[i]))
    return result

In [14]:
give_recommendations(1, True)

The watched movie is: Jumanji (1995), its genres: "Adventure Children's Fantasy"

Rank 1 recommended movie is: NeverEnding Story, The (1984), its genres: "Adventure Children's Fantasy"

Rank 2 recommended movie is: Darby O'Gill and the Little People (1959), its genres: "Adventure Children's Fantasy"

Rank 3 recommended movie is: NeverEnding Story III, The (1994), its genres: "Adventure Children's Fantasy"

Rank 4 recommended movie is: Kids of the Round Table (1995), its genres: "Adventure Children's Fantasy"

Rank 5 recommended movie is: Indian in the Cupboard, The (1995), its genres: "Adventure Children's Fantasy"

Rank 6 recommended movie is: Labyrinth (1986), its genres: "Adventure Children's Fantasy"

Rank 7 recommended movie is: Escape to Witch Mountain (1975), its genres: "Adventure Children's Fantasy"

Rank 8 recommended movie is: Jumanji (1995), its genres: "Adventure Children's Fantasy"

Rank 9 recommended movie is: Quest for Camelot (1998), its genres: "Adventure Animation Ch

{'Movies': array(['NeverEnding Story, The (1984)',
        "Darby O'Gill and the Little People (1959)",
        'NeverEnding Story III, The (1994)',
        'Kids of the Round Table (1995)',
        'Indian in the Cupboard, The (1995)', 'Labyrinth (1986)',
        'Escape to Witch Mountain (1975)', 'Jumanji (1995)',
        'Quest for Camelot (1998)', 'Star Kid (1997)'], dtype=object),
 'Index': [2092, 1974, 124, 55, 59, 1898, 996, 1, 1812, 1698]}