In [1]:
#Importing Modules
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

import nltk
from nltk import PorterStemmer
from nltk import word_tokenize

import sklearn
from sklearn.feature_extraction.text import TfidfVectorizer 
from sklearn.metrics.pairwise import cosine_similarity

import torchmetrics
from torchmetrics import Accuracy
from torchmetrics import ConfusionMatrix
from mlxtend.plotting import plot_confusion_matrix

import os
import numpy as np
import pandas as pd
import json

from timeit import default_timer as timer
from tqdm.auto import tqdm
from random import shuffle, seed

#Download
nltk.download('punkt')

#TF-IDF Vectorisation
vectoriser = TfidfVectorizer(
    max_features = 6000,
    min_df = 2,
    max_df = 0.85,
    stop_words = 'english',
    ngram_range = (1, 3)
)

#Stemming
stemmer = PorterStemmer()

#Device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

#Printing Versions
print(f"Pytorch Version : {torch.__version__}")
print(f"Torchmetrics Version : {torchmetrics.__version__}")
print(f"Device : {device}")

Pytorch Version : 2.6.0+cu124
Torchmetrics Version : 1.7.1
Device : cpu


[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
#Merging it into One Single DataFrame
Movies = pd.read_csv("/kaggle/input/tmdb-movie-metadata/tmdb_5000_movies.csv")
Credits = pd.read_csv("/kaggle/input/tmdb-movie-metadata/tmdb_5000_credits.csv")
Credits.rename(columns = {'movie_id' : 'id'}, inplace = True)
Movies = pd.merge(Movies, Credits, on = "id", how = "right")
Movies

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,...,runtime,spoken_languages,status,tagline,title_x,vote_average,vote_count,title_y,cast,crew
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...",...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Avatar,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...",...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Pirates of the Caribbean: At World's End,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,245000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.sonypictures.com/movies/spectre/,206647,"[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...",en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...",...,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,Spectre,"[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."
3,250000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...",http://www.thedarkknightrises.com/,49026,"[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...",en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.312950,"[{""name"": ""Legendary Pictures"", ""id"": 923}, {""...",...,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,The Dark Knight Rises,"[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de..."
4,260000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://movies.disney.com/john-carter,49529,"[{""id"": 818, ""name"": ""based on novel""}, {""id"":...",en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}]",...,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,John Carter,"[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4798,220000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...",,9367,"[{""id"": 5616, ""name"": ""united states\u2013mexi...",es,El Mariachi,El Mariachi just wants to play his guitar and ...,14.269792,"[{""name"": ""Columbia Pictures"", ""id"": 5}]",...,81.0,"[{""iso_639_1"": ""es"", ""name"": ""Espa\u00f1ol""}]",Released,"He didn't come looking for trouble, but troubl...",El Mariachi,6.6,238,El Mariachi,"[{""cast_id"": 1, ""character"": ""El Mariachi"", ""c...","[{""credit_id"": ""52fe44eec3a36847f80b280b"", ""de..."
4799,9000,"[{""id"": 35, ""name"": ""Comedy""}, {""id"": 10749, ""...",,72766,[],en,Newlyweds,A newlywed couple's honeymoon is upended by th...,0.642552,[],...,85.0,[],Released,A newlywed couple's honeymoon is upended by th...,Newlyweds,5.9,5,Newlyweds,"[{""cast_id"": 1, ""character"": ""Buzzy"", ""credit_...","[{""credit_id"": ""52fe487dc3a368484e0fb013"", ""de..."
4800,0,"[{""id"": 35, ""name"": ""Comedy""}, {""id"": 18, ""nam...",http://www.hallmarkchannel.com/signedsealeddel...,231617,"[{""id"": 248, ""name"": ""date""}, {""id"": 699, ""nam...",en,"Signed, Sealed, Delivered","""Signed, Sealed, Delivered"" introduces a dedic...",1.444476,"[{""name"": ""Front Street Pictures"", ""id"": 3958}...",...,120.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,,"Signed, Sealed, Delivered",7.0,6,"Signed, Sealed, Delivered","[{""cast_id"": 8, ""character"": ""Oliver O\u2019To...","[{""credit_id"": ""52fe4df3c3a36847f8275ecf"", ""de..."
4801,0,[],http://shanghaicalling.com/,126186,[],en,Shanghai Calling,When ambitious New York attorney Sam is sent t...,0.857008,[],...,98.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,A New Yorker in Shanghai,Shanghai Calling,5.7,7,Shanghai Calling,"[{""cast_id"": 3, ""character"": ""Sam"", ""credit_id...","[{""credit_id"": ""52fe4ad9c3a368484e16a36b"", ""de..."


In [3]:
#TMDB5000 Dataset
class Cinema(Dataset):
    def __init__(self, df):
        self.df = df
        self.movies = []
        for movie in self.df.itertuples():

            #Title, Tagline and Overview
            title = movie.original_title
            tagline = movie.tagline if type(movie.tagline) == str else ''
            overview = movie.overview if type(movie.overview) == str else ''
            tokens = word_tokenize(overview.lower())[:60]
            overview = ' '.join([stemmer.stem(word) for word in tokens])

            #Genres
            genres = json.loads(movie.genres)
            genres = ' '.join([genre['name'] for genre in genres])

            #Keywords
            keywords = json.loads(movie.keywords)
            keywords = ' '.join([keyword['name'] for keyword in keywords])

            #Language
            language = movie.original_language

            #Director
            crew = json.loads(movie.crew)
            director = ''.join([member['name'] for member in crew if member['job'] == 'Director'])
            director = director.lower()

            #Top 3 Cast and Characters
            cast = json.loads(movie.cast)
            cast_3 = ' '.join([member['name'] for member in cast][:3])
            cast_3 = cast_3.lower()

            characters = json.loads(movie.cast)
            characters_4 = ' '.join([member['character'] for member in characters][:4])
            characters_4 = characters_4.lower()

            #Tag
            tag = (genres + ' ') * 5 + (keywords + ' ') * 4
            tokens = word_tokenize(tag.lower())

            #Stemming
            stemmed = ' '.join([stemmer.stem(word) for word in tokens])

            # Combining for Tag
            tag = (title + ' ') * 2 +  stemmed + ' ' + overview + ' ' + (tagline + ' ') * 2
            tag += cast_3 + ' ' + (characters_4 + ' ') * 4 + director * 3 + ' ' + (language + ' ') * 3
                       
            self.movies.append({'title' : title, 'tag' : tag})
            
    def __len__(self):
        return len(self.movies)
    
    def __getitem__(self, idx):
        movie = self.movies[idx]
        return idx, movie['title'], movie['tag']

#Getting the Dataset
Movies = Cinema(Movies)

In [4]:
#For TF-IDF Vectorisation

tags = []
for Movie in Movies:
    tags.append(Movie[2])

matrix = vectoriser.fit_transform(tags)
cos_sim = cosine_similarity(matrix)

In [5]:
# Top 10 Movies
def CineMatch(title, cosine_similarity, dataset):
    try:
        idx = [Movie[0] for Movie in dataset if Movie[1] == title][0]
    except:
        print("Can't Find Matches")
        return
    
    scores = list(enumerate(cosine_similarity[idx]))
    scores = sorted(scores, key=lambda x: x[1], reverse=True)
    top_10 = scores[1:11]

    for i, movie in enumerate(top_10):
        print(f"{i + 1}. {dataset[movie[0]][1]}")

In [6]:
#CineMatch in Action

Movie_Title = input("\nEnter Movie Title : ")
print("Your Top 10 Recommended Movies: ")
CineMatch(
    title = Movie_Title,
    cosine_similarity = cos_sim,
    dataset = Movies
)

while True:
    print("\nDo you want to Continue?")
    choice = input("Enter your Choice (Y or N): ")
    if choice.lower() == "y":
        Movie_Title = input("\nEnter Movie Title : ")
        print("Your Top 10 Recommended Movies: ")
        CineMatch(
            title = Movie_Title,
            cosine_similarity = cos_sim,
            dataset = Movies
        )
        continue
    elif choice.lower() == "n":
        break
    else:
        print('Invalid Choice')


Enter Movie Title :  Avatar


Your Top 10 Recommended Movies: 
1. Alien³
2. Planet of the Apes
3. Moonraker
4. Star Trek Into Darkness
5. Silent Running
6. Aliens
7. Alien
8. Cargo
9. Mission to Mars
10. Beastmaster 2: Through the Portal of Time

Do you want to Continue?


Enter your Choice (Y or N):  Y

Enter Movie Title :  Ice Age


Your Top 10 Recommended Movies: 
1. Dinosaur
2. La Guerre du feu
3. Year One
4. The Clan of the Cave Bear
5. Ice Age: Dawn of the Dinosaurs
6. Ice Age: Continental Drift
7. The Croods
8. The Nut Job
9. The Lion of Judah
10. Ice Age: The Meltdown

Do you want to Continue?


Enter your Choice (Y or N):  Y

Enter Movie Title :  Interstellar


Your Top 10 Recommended Movies: 
1. Silent Running
2. Midnight Special
3. Armageddon
4. Солярис
5. 2001: A Space Odyssey
6. The Astronaut's Wife
7. The Astronaut Farmer
8. About Time
9. Event Horizon
10. Terminator Genisys

Do you want to Continue?


Enter your Choice (Y or N):  Y

Enter Movie Title :  The Dark Knight


Your Top 10 Recommended Movies: 
1. Batman Begins
2. Batman
3. Batman Forever
4. The Dark Knight Rises
5. Batman: The Dark Knight Returns, Part 2
6. Batman Returns
7. Batman & Robin
8. Batman v Superman: Dawn of Justice
9. Batman
10. Superman

Do you want to Continue?


Enter your Choice (Y or N):  Y

Enter Movie Title :  Superman III


Your Top 10 Recommended Movies: 
1. Superman II
2. Superman
3. Man of Steel
4. Superman Returns
5. Batman v Superman: Dawn of Justice
6. Superman IV: The Quest for Peace
7. X-Men: Days of Future Past
8. 4: Rise of the Silver Surfer
9. Dragonball Evolution
10. The Amazing Spider-Man 2

Do you want to Continue?


Enter your Choice (Y or N):  Y

Enter Movie Title :  The Avengers


Your Top 10 Recommended Movies: 
1. Avengers: Age of Ultron
2. Captain America: The Winter Soldier
3. Iron Man 3
4. Ant-Man
5. The Incredible Hulk
6. Captain America: Civil War
7. Iron Man 2
8. Iron Man
9. Thor: The Dark World
10. Captain America: The First Avenger

Do you want to Continue?


Enter your Choice (Y or N):  Y

Enter Movie Title :  Pacific Rim


Your Top 10 Recommended Movies: 
1. Journey 2: The Mysterious Island
2. Transformers: Dark of the Moon
3. The Time Machine
4. Megaforce
5. Star Wars: Episode I - The Phantom Menace
6. Transformers: Age of Extinction
7. Dune
8. Star Wars: Episode III - Revenge of the Sith
9. Divergent
10. The Hunger Games: Catching Fire

Do you want to Continue?


Enter your Choice (Y or N):  Y

Enter Movie Title :  Sherlock Holmes


Your Top 10 Recommended Movies: 
1. Sherlock Holmes: A Game of Shadows
2. Young Sherlock Holmes
3. The Losers
4. The Glimmer Man
5. 16 Blocks
6. Whiteout
7. Changing Lanes
8. Nancy Drew
9. Shaft
10. Cellular

Do you want to Continue?


Enter your Choice (Y or N):  Y

Enter Movie Title :  Frozen


Your Top 10 Recommended Movies: 
1. Khumba
2. Jungle Shuffle
3. Sinbad: Legend of the Seven Seas
4. The Wild Thornberrys Movie
5. The Jungle Book 2
6. Epic
7. Zambezia
8. Legend of the Guardians: The Owls of Ga'Hoole
9. Metegol
10. Dragon Nest: Warriors' Dawn

Do you want to Continue?


Enter your Choice (Y or N):  N
