# 🎬 Movie Recommendation System
This project builds a simple **content-based movie recommendation system** using Python and scikit-learn.

In [7]:
# Phase 1: Install & Import Libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


ModuleNotFoundError: No module named 'pandas'

In [None]:
# Phase 2: Load Dataset
# Download from: https://www.kaggle.com/datasets/tmdb/tmdb-movie-metadata
df = pd.read_csv("tmdb_5000_movies.csv")

# Preview data
df.head()


In [None]:
# Phase 3: Select and Clean Features
features = ['title', 'genres', 'overview']
movies = df[features]
movies['overview'] = movies['overview'].fillna('')


In [None]:
# Clean 'genres' field
import ast

def parse_genres(genres_str):
    try:
        genres_list = ast.literal_eval(genres_str)
        return ' '.join([genre['name'] for genre in genres_list])
    except:
        return ''

movies['genres'] = df['genres'].apply(parse_genres)


In [None]:
# Combine overview and genres into a single text
movies['tags'] = movies['overview'] + ' ' + movies['genres']


In [None]:
# Phase 4: Vectorization using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english')
vectors = vectorizer.fit_transform(movies['tags'])


In [None]:
# Phase 5: Compute Cosine Similarity
similarity = cosine_similarity(vectors)


In [None]:
# Phase 6: Recommendation Function
def recommend(movie_title):
    movie_title = movie_title.lower()
    if movie_title not in movies['title'].str.lower().values:
        return "Movie not found!"
    
    idx = movies[movies['title'].str.lower() == movie_title].index[0]
    distances = list(enumerate(similarity[idx]))
    movies_list = sorted(distances, key=lambda x: x[1], reverse=True)[1:6]
    
    print(f"Top 5 movies similar to '{movies.iloc[idx]['title']}':\n")
    for i in movies_list:
        print(movies.iloc[i[0]]['title'])


In [None]:
# Phase 7: Try it Out
recommend("Avatar")
