<a href="https://colab.research.google.com/github/Bhavyaveer44/MiniProjML/blob/main/Movie_Recommendation_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [33]:
import pandas as pd
import ast
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [34]:
df = pd.read_csv("tmdb_5000_movies.csv")
# Keeping necessary columns only
df = df[['title', 'genres', 'keywords']]  # Optionally might add 'overview'

In [35]:
# Function to parse 'genres' and 'keywords' columns
def extract_names(text):
    try:
        items = ast.literal_eval(text)
        return " ".join([item['name'] for item in items])
    except:
        return ""

df['genres'] = df['genres'].apply(extract_names)
df['keywords'] = df['keywords'].apply(extract_names)

# Combining selected features into one
df['features'] = df['genres'] + " " + df['keywords']

# Cleaning the text
def clean_text(text):
    text = text.lower()
    text = re.sub(r'\W', ' ', text)
    return text

df['features'] = df['features'].apply(clean_text)

In [36]:
# TF-IDF Vectorizer
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df['features'])

# Similarity calculation
similarity_matrix = cosine_similarity(tfidf_matrix)

def recommend_movies(movie_title, num_recommendations=5):
    if movie_title not in df['title'].values:
        return "Movie not found in dataset."

    movie_index = df[df['title'] == movie_title].index[0]
    similarity_scores = list(enumerate(similarity_matrix[movie_index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    top_indices = [i[0] for i in similarity_scores[1:num_recommendations+1]]
    return df.iloc[top_indices]['title'].tolist()

In [37]:
user_movie = input("Enter a movie title: ")
try:
    num = int(input("How many similar movies would you like to see? "))
except ValueError:
    print("Invalid number. Defaulting to 5.")
    num = 5

recommendations = recommend_movies(user_movie, num)
if isinstance(recommendations, str):  # Error message from function
    print(recommendations)
else:
    print("\nRecommended Movies:")
    for i, movie in enumerate(recommendations, 1):
        print(f"{i}. {movie}")


Enter a movie title: The Dark Knight
How many similar movies would you like to see? 6

Recommended Movies:
1. Batman Begins
2. The Dark Knight Rises
3. Batman & Robin
4. Batman
5. Batman Returns
6. Superman III
