<a href="https://colab.research.google.com/github/Kugelblitz-26/data/blob/main/movieRecommender.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import os

# Set the working directory to the dataset location


# Load the movie dataset
movies_data = pd.read_csv('movies.csv')


In [None]:
movies_data.info()
print("Dataset Shape:", movies_data.shape)
print("Column Names:", movies_data.columns)
print("Sample Data:")
print(movies_data.head())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4803 entries, 0 to 4802
Data columns (total 24 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   index                 4803 non-null   int64  
 1   budget                4803 non-null   int64  
 2   genres                4803 non-null   object 
 3   homepage              1712 non-null   object 
 4   id                    4803 non-null   int64  
 5   keywords              4803 non-null   object 
 6   original_language     4803 non-null   object 
 7   original_title        4803 non-null   object 
 8   overview              4800 non-null   object 
 9   popularity            4803 non-null   float64
 10  production_companies  4803 non-null   object 
 11  production_countries  4803 non-null   object 
 12  release_date          4802 non-null   object 
 13  revenue               4803 non-null   int64  
 14  runtime               4801 non-null   float64
 15  spoken_languages     

In [None]:
# Handle missing values
selected_features = ['genres', 'keywords', 'tagline', 'cast', 'director']
for feature in selected_features:
    movies_data[feature] = movies_data[feature].fillna('')

# Combine selected features
combined_features = movies_data['genres'] + ' ' + movies_data['keywords'] + ' ' + movies_data['tagline'] + ' ' + movies_data['cast'] + ' ' + movies_data['director']

# Convert text data to feature vectors
vectorizer = TfidfVectorizer()
feature_vectors = vectorizer.fit_transform(combined_features)


In [None]:
similarity = cosine_similarity(feature_vectors)


In [None]:
def get_movie_recommendations(movie_name):
    # Find the closest match to the input movie name
    list_of_all_titles = movies_data['title'].tolist()
    find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
    close_match = find_close_match[0]

    # Get the index of the input movie
    index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0]

    # Get the similarity scores for the input movie
    similarity_score = list(enumerate(similarity[index_of_the_movie]))

    # Sort movies based on similarity score
    sorted_similar_movies = sorted(similarity_score, key=lambda x: x[1], reverse=True)

    # Return recommended movies
    recommended_movies = [movies_data[movies_data.index == movie[0]]['title'].values[0] for movie in sorted_similar_movies[1:31]]

    return recommended_movies


In [None]:
favorite_movie = input("Enter your favorite movie name: ")
recommendations = get_movie_recommendations(favorite_movie)
print("Movies recommended for you:")
for i, movie in enumerate(recommendations, 1):
    print(f"{i}. {movie}")


Enter your favorite movie name: avatar
Movies recommended for you:
1. Alien
2. Aliens
3. Guardians of the Galaxy
4. Star Trek Beyond
5. Star Trek Into Darkness
6. Galaxy Quest
7. Alien³
8. Cargo
9. Trekkies
10. Gravity
11. Moonraker
12. Jason X
13. Pocahontas
14. Space Cowboys
15. The Helix... Loaded
16. Lockout
17. Event Horizon
18. Space Dogs
19. Machete Kills
20. Gettysburg
21. Clash of the Titans
22. Star Wars: Clone Wars: Volume 1
23. The Right Stuff
24. Terminator Salvation
25. The Astronaut's Wife
26. Planet of the Apes
27. Star Trek
28. Wing Commander
29. Sunshine
30. The Terminator


In [None]:
import pickle

# Save the similarity matrix
with open('movie_similarity_matrix.pkl', 'wb') as model_file:
    pickle.dump(similarity, model_file)

# Load the similarity matrix
with open('movie_similarity_matrix.pkl', 'rb') as model_file:
    loaded_similarity = pickle.load(model_file)
