In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Load the dataset
movies_df = pd.read_csv("movie_dataset.csv")

In [3]:
# Inspect the dataset
print(movies_df.columns)

Index(['index', 'budget', 'genres', 'homepage', 'id', 'keywords',
       'original_language', 'original_title', 'overview', 'popularity',
       'production_companies', 'production_countries', 'release_date',
       'revenue', 'runtime', 'spoken_languages', 'status', 'tagline', 'title',
       'vote_average', 'vote_count', 'cast', 'crew', 'director'],
      dtype='object')


In [4]:
# Select relevant features for the recommendation system
features = ['keywords', 'cast', 'genres', 'director']

In [5]:
# Fill missing values with empty strings
for feature in features:
    movies_df[feature] = movies_df[feature].fillna('')

In [6]:
# Combine selected features into a single string
def combine_features(row):
    return f"{row['keywords']} {row['cast']} {row['genres']} {row['director']}"

movies_df['combined_features'] = movies_df.apply(combine_features, axis=1)

In [7]:
# Convert text data into feature vectors using CountVectorizer
cv = CountVectorizer()
count_matrix = cv.fit_transform(movies_df['combined_features'])

In [8]:
# Compute the cosine similarity based on the count_matrix
cosine_sim = cosine_similarity(count_matrix)

In [15]:
# Function to get movie recommendations
def get_recommendations(title, cosine_sim=cosine_sim):
    # Get the index of the movie that matches the title
    try:
        movie_index = movies_df[movies_df['title'] == title].index[0]
    except IndexError:
        return ["Movie not found in the dataset."]
    
    # Get a list of similarity scores for the movie
    similar_movies = list(enumerate(cosine_sim[movie_index]))
    
    # Sort the movies based on similarity scores
    sorted_similar_movies = sorted(similar_movies, key=lambda x: x[1], reverse=True)[1:]
    
    # Get the titles of the top 10 similar movies
    recommended_movies = []
    for i in range(10):
        recommended_movies.append(movies_df.iloc[sorted_similar_movies[i][0]]['title'])
    return recommended_movies

In [16]:
# Example usage
movie_title = "Avatar"  # Replace with the movie title you want recommendations for
recommendations = get_recommendations(movie_title)
print(f"Movies similar to '{movie_title}':")
for rec in recommendations:
    print(rec)

Movies similar to 'Avatar':
Guardians of the Galaxy
Aliens
Star Wars: Clone Wars: Volume 1
Star Trek Into Darkness
Star Trek Beyond
Alien
Lockout
Jason X
The Helix... Loaded
Moonraker


In [19]:
# Example usage
movie_title = "Thor"  # Replace with the movie title you want recommendations for
recommendations = get_recommendations(movie_title)
print(f"Movies similar to '{movie_title}':")
for rec in recommendations:
    print(rec)

Movies similar to 'Thor':
Thor: The Dark World
The Avengers
Captain America: The Winter Soldier
Avengers: Age of Ultron
Captain America: Civil War
Pirates of the Caribbean: Dead Man's Chest
Cinderella
Jack Ryan: Shadow Recruit
The Amazing Spider-Man 2
Captain America: The First Avenger
