In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity

tfidf = TfidfVectorizer(stop_words='english')
svd = TruncatedSVD(n_components=200, random_state=42)

#Director

In [2]:
movies = pd.read_csv("/content/drive/MyDrive/Director.csv")

In [3]:
movies['tags'][0]

'christophernolan action sciencefict adventur rescu mission dream airplan pari franc virtualr kidnap philosophi spi allegori manipul carcrash heist memori architectur losangel california dreamworld subconsci dileeprao leonardodicaprio johnathangear'

In [4]:
vector = tfidf.fit_transform(movies['tags'])
vector.shape

(157262, 356147)

In [5]:
vector = svd.fit_transform(vector)
vector.shape

(157262, 200)

In [6]:
similarity = cosine_similarity(vector)

In [7]:
similarity

array([[ 1.00000000e+00,  1.43756852e-01,  5.57601120e-02, ...,
         4.04474582e-04, -4.20446280e-03,  1.01029801e-02],
       [ 1.43756852e-01,  1.00000000e+00, -2.64418742e-03, ...,
         3.28601523e-02,  1.24841563e-02,  8.39485985e-03],
       [ 5.57601120e-02, -2.64418742e-03,  1.00000000e+00, ...,
        -2.17765855e-04,  1.59762140e-02,  1.15829690e-03],
       ...,
       [ 4.04474582e-04,  3.28601523e-02, -2.17765855e-04, ...,
         1.00000000e+00, -1.28533285e-03,  3.83008920e-01],
       [-4.20446280e-03,  1.24841563e-02,  1.59762140e-02, ...,
        -1.28533285e-03,  1.00000000e+00,  4.29759913e-03],
       [ 1.01029801e-02,  8.39485985e-03,  1.15829690e-03, ...,
         3.83008920e-01,  4.29759913e-03,  1.00000000e+00]])

In [8]:
similarity.shape

(157262, 157262)

In [9]:
top_n = 25
updated_similarity = np.zeros((similarity.shape[0], top_n), dtype=int)
updated_similarity.shape

(157262, 25)

In [10]:
for i in range(similarity.shape[0]):
    sorted_indices = np.argsort(-similarity[i])[:top_n]
    updated_similarity[i] = sorted_indices

In [11]:
updated_similarity[101]

array([   101,   1403,   2938,   5016,   7837,   1801,  76390,  14692,
         2589, 151487,  68140, 136145, 123866,  46032,  73197,  59928,
         2743, 148679, 118846,   1381,  30169,   4198,   2501,    243,
        19545])

In [12]:
updated_similarity.shape

(157262, 25)

In [13]:
def movie_recommend(movie):
    movie_index = movies[movies['title'] == movie].index[0]
    recommended_indices = updated_similarity[movie_index][:10]

    for i in recommended_indices:
        print(movies.iloc[i].title)

In [14]:
movie_recommend('Around the World in 80 Days')

Around the World in 80 Days
A View to a Kill
Bean
Men in Black: International
Ghost Dance
RED 2
Teheran '43
Sherlock Holmes: A Game of Shadows
The Scorpio Letters
Inspector Clouseau


In [15]:
with open('/content/drive/MyDrive/director_similarity.pkl', 'wb') as file:
    pickle.dump(updated_similarity, file)