In [12]:
!mv kaggle.json /root/.kaggle

In [13]:
!kaggle datasets download -d disham993/9000-movies-dataset

Downloading 9000-movies-dataset.zip to /content
  0% 0.00/1.70M [00:00<?, ?B/s]
100% 1.70M/1.70M [00:00<00:00, 98.8MB/s]


In [14]:
!unzip 9000-movies-dataset.zip

Archive:  9000-movies-dataset.zip
  inflating: mymoviedb.csv           


In [21]:
import pandas as pd

dataset = pd.read_csv("mymoviedb.csv",lineterminator="\n")
dataset.head()

Unnamed: 0,Release_Date,Title,Overview,Popularity,Vote_Count,Vote_Average,Original_Language,Genre,Poster_Url
0,2021-12-15,Spider-Man: No Way Home,Peter Parker is unmasked and no longer able to...,5083.954,8940,8.3,en,"Action, Adventure, Science Fiction",https://image.tmdb.org/t/p/original/1g0dhYtq4i...
1,2022-03-01,The Batman,"In his second year of fighting crime, Batman u...",3827.658,1151,8.1,en,"Crime, Mystery, Thriller",https://image.tmdb.org/t/p/original/74xTEgt7R3...
2,2022-02-25,No Exit,Stranded at a rest stop in the mountains durin...,2618.087,122,6.3,en,Thriller,https://image.tmdb.org/t/p/original/vDHsLnOWKl...
3,2021-11-24,Encanto,"The tale of an extraordinary family, the Madri...",2402.201,5076,7.7,en,"Animation, Comedy, Family, Fantasy",https://image.tmdb.org/t/p/original/4j0PNHkMr5...
4,2021-12-22,The King's Man,As a collection of history's worst tyrants and...,1895.511,1793,7.0,en,"Action, Adventure, Thriller, War",https://image.tmdb.org/t/p/original/aq4Pwv5Xeu...


In [22]:
data = dataset[['Title','Overview','Genre']]
data.head()

Unnamed: 0,Title,Overview,Genre
0,Spider-Man: No Way Home,Peter Parker is unmasked and no longer able to...,"Action, Adventure, Science Fiction"
1,The Batman,"In his second year of fighting crime, Batman u...","Crime, Mystery, Thriller"
2,No Exit,Stranded at a rest stop in the mountains durin...,Thriller
3,Encanto,"The tale of an extraordinary family, the Madri...","Animation, Comedy, Family, Fantasy"
4,The King's Man,As a collection of history's worst tyrants and...,"Action, Adventure, Thriller, War"


In [23]:
data.shape

(9827, 3)

In [24]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(stop_words='english')
data['Overview'] = data['Overview'].fillna('')
tfidf_matrix = tfidf.fit_transform(data['Overview'])
tfidf_matrix.shape

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Overview'] = data['Overview'].fillna('')


(9827, 28483)

In [25]:
# model learning
from sklearn.metrics.pairwise import linear_kernel

model = linear_kernel(tfidf_matrix, tfidf_matrix)

In [26]:
# mapping movie name to the indices
indices = pd.Series(data.index,index=data['Title']).drop_duplicates()
indices[0:10]

Title
Spider-Man: No Way Home    0
The Batman                 1
No Exit                    2
Encanto                    3
The King's Man             4
The Commando               5
Scream                     6
Kimi                       7
Fistful of Vengeance       8
Eternals                   9
dtype: int64

In [27]:
def get_recommendations(title,model=model):
    idx = indices[title]
    sim_scores = list(enumerate(model[idx]))
    sim_scores.sort(key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:4]
    movie_indices = [i[0] for i in sim_scores]
    similar_movies = data['Title'].iloc[movie_indices].tolist()

    return similar_movies

In [31]:
get_recommendations("The King's Man")

['Artemis Fowl', 'Madu Murni', 'Papillon']

In [29]:
# saving the model
model_name = "recommendation_model"

import joblib

joblib.dump(model,model_name)

['recommendation_model']

In [30]:
# trying out the saved model
saved_model = joblib.load(model_name)
get_recommendations("Tiempo",model=saved_model)

['Little Nemo: Adventures in Slumberland',
 'Coming Home in the Dark',
 'Naruto Shippuden the Movie: Bonds']

In [33]:
movie_url = dataset[["Title","Poster_Url"]]
movie_url.head()

Unnamed: 0,Title,Poster_Url
0,Spider-Man: No Way Home,https://image.tmdb.org/t/p/original/1g0dhYtq4i...
1,The Batman,https://image.tmdb.org/t/p/original/74xTEgt7R3...
2,No Exit,https://image.tmdb.org/t/p/original/vDHsLnOWKl...
3,Encanto,https://image.tmdb.org/t/p/original/4j0PNHkMr5...
4,The King's Man,https://image.tmdb.org/t/p/original/aq4Pwv5Xeu...


In [35]:
def combine_url(movie_name):
    result = movie_url.loc[movie_url['Title'] == movie_name, 'Poster_Url'].iloc[0]
    return [movie_name,result]
combine_url("The Batman")

['The Batman',
 'https://image.tmdb.org/t/p/original/74xTEgt7R36Fpooo50r9T25onhq.jpg']

In [41]:
def combine_movie(movie_name):
    d = dict({f"{movie_name}" : []}) # d is a dictionary
    movies = get_recommendations(movie_name)
    for movie in movies:
        d[f"{movie_name}"].append(combine_url(movie))

    return d

combine_movie("The Batman")

{'The Batman': [['Batman: The Long Halloween, Part Two',
   'https://image.tmdb.org/t/p/original/5X1n5q08mZ7NpNpxehMFODxfNYq.jpg'],
  ['Batman: The Long Halloween, Part One',
   'https://image.tmdb.org/t/p/original/sR7gppb0YGjwLvE6Vnj6wYv5MnW.jpg'],
  ['Batman: Return of the Caped Crusaders',
   'https://image.tmdb.org/t/p/original/tHVnZuvVPeg6cVufF4hcNtCJsJK.jpg']]}

In [63]:
def rec():
    array = list()
    new_array = movie_url["Title"]
    for m in new_array:
        try:
            array.append(combine_movie(m))
        except:
            pass
    return array

In [64]:
json_value = rec()
json_value

[{'Spider-Man: No Way Home': [['Spider-Man',
    'https://image.tmdb.org/t/p/original/gh4cZbhZxyTbgxQPxD0dOudNPTn.jpg'],
   ['Spider-Man Strikes Back',
    'https://image.tmdb.org/t/p/original/fb5R5DUOT4NfhZn8903c1gYGHJz.jpg'],
   ['Spider-Man',
    'https://image.tmdb.org/t/p/original/gh4cZbhZxyTbgxQPxD0dOudNPTn.jpg']]},
 {'The Batman': [['Batman: The Long Halloween, Part Two',
    'https://image.tmdb.org/t/p/original/5X1n5q08mZ7NpNpxehMFODxfNYq.jpg'],
   ['Batman: The Long Halloween, Part One',
    'https://image.tmdb.org/t/p/original/sR7gppb0YGjwLvE6Vnj6wYv5MnW.jpg'],
   ['Batman: Return of the Caped Crusaders',
    'https://image.tmdb.org/t/p/original/tHVnZuvVPeg6cVufF4hcNtCJsJK.jpg']]},
 {'No Exit': [['Martyrs',
    'https://image.tmdb.org/t/p/original/do92C3aiADF8SHC7gkRI65z6o9S.jpg'],
   ['A Street Cat Named Bob',
    'https://image.tmdb.org/t/p/original/nBYG0D2FcbL1m926sIj7RN4m0sb.jpg'],
   ['[REC]',
    'https://image.tmdb.org/t/p/original/5XsVGgo8I12v3KlPcD0r1CNHMC6.jpg']]},


In [65]:
# converting to json
import json

file_name = "movie_recommendations.json"

with open(file_name,"w") as json_file:
    json.dump(json_value,json_file,indent=4)

print("File is created")

File is created
