#**Movie Recommendation System**

Downloading and loading dataset from Kaggle

In [15]:
!pip install kagglehub[pandas-datasets]

import kagglehub
from kagglehub import KaggleDatasetAdapter

#Load movies file
movies = kagglehub.load_dataset(
    KaggleDatasetAdapter.PANDAS,
    "tmdb/tmdb-movie-metadata",
    "tmdb_5000_movies.csv"
)

print("Movies shape:", movies.shape)
print("First few movies:")
print(movies.head())




  movies = kagglehub.load_dataset(


Using Colab cache for faster access to the 'tmdb-movie-metadata' dataset.
Movies shape: (4803, 20)
First few movies:
      budget                                             genres  \
0  237000000  [{"id": 28, "name": "Action"}, {"id": 12, "nam...   
1  300000000  [{"id": 12, "name": "Adventure"}, {"id": 14, "...   
2  245000000  [{"id": 28, "name": "Action"}, {"id": 12, "nam...   
3  250000000  [{"id": 28, "name": "Action"}, {"id": 80, "nam...   
4  260000000  [{"id": 28, "name": "Action"}, {"id": 12, "nam...   

                                       homepage      id  \
0                   http://www.avatarmovie.com/   19995   
1  http://disney.go.com/disneypictures/pirates/     285   
2   http://www.sonypictures.com/movies/spectre/  206647   
3            http://www.thedarkknightrises.com/   49026   
4          http://movies.disney.com/john-carter   49529   

                                            keywords original_language  \
0  [{"id": 1463, "name": "culture clash"}, {"id":..

Importing Libraries

In [16]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

Selecting useful features

In [17]:
movies = movies[['id','title','overview','genres','keywords']]

Remove missing values

In [18]:
movies.fillna('', inplace = True)

Make content description

In [19]:
movies['tags'] = movies['overview'] + " " + movies['genres'] + " " + movies['keywords']
#combining all text into single column

#Vectorization

Converting text into numerical values

In [20]:
vec = CountVectorizer(max_features = 5000, stop_words = 'english')
vectors = vec.fit_transform(movies['tags']).toarray()

Cal similar scores

In [21]:
sim = cosine_similarity(vectors)
#cosine_similarity measures how close the two vectors are

#Recommention Function

In [22]:
def recommend(movie_name):
  movie_name = movie_name.lower() #to avoid case sentivity

  if movie_name not in movies['title'].str.lower().values: #checking whether movie exists or not
    print(f"Movie '{movie_name}' not found in dataset.")
    return

  movie_index = movies[movies['title'].str.lower() == movie_name].index[0] #getting index of movie

  similar_score = sim[movie_index] #getting similar scores for the movie

  movie_list = sorted(list(enumerate(similar_score)), key=lambda x: x[1], reverse=True)[1:6] #sorting the scores
                                                          #^anonymous function
                           #^enumerate adds index to every item in an iterable
                     #^converts enumerate object into list
  print(f"\n Top 5 recommendations for '{movies.iloc[movie_index].title}':")
  for i in movie_list:
    print(" >", movies.iloc[i[0]].title)

#Testing in multiple movies
recommend("Avatar")
recommend("The Dark Knight")
recommend("Inception")
recommend("Pirates of the Caribbean: At World's End")
recommend("The Lion King")
recommend("Titanic")


 Top 5 recommendations for 'Avatar':
 > The Fifth Element
 > Dune
 > Interstellar
 > Southland Tales
 > Everyone Says I Love You

 Top 5 recommendations for 'The Dark Knight':
 > Batman Begins
 > The Dark Knight Rises
 > Batman Forever
 > Brooklyn's Finest
 > Everyone Says I Love You

 Top 5 recommendations for 'Inception':
 > Southland Tales
 > The Matrix Revolutions
 > Everyone Says I Love You
 > Brooklyn's Finest
 > Mad Max 2: The Road Warrior

 Top 5 recommendations for 'Pirates of the Caribbean: At World's End':
 > Pirates of the Caribbean: Dead Man's Chest
 > Everyone Says I Love You
 > Southland Tales
 > Brooklyn's Finest
 > The Hobbit: An Unexpected Journey

 Top 5 recommendations for 'The Lion King':
 > Everyone Says I Love You
 > Brooklyn's Finest
 > Southland Tales
 > Joe
 > The Simpsons Movie

 Top 5 recommendations for 'Titanic':
 > Everyone Says I Love You
 > Brooklyn's Finest
 > Southland Tales
 > Hard Candy
 > Poseidon
