<a href="https://colab.research.google.com/github/Hadia-git-sketch/Syntecxhub_MovieRecommendationSystem/blob/main/Movie_Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
# 1. Install and Import Libraries
!pip install -q gradio pandas scikit-learn

import pandas as pd
import numpy as np
import ast
import gradio as gr
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# --- 2. DATA LOADING & EDA ---
print("üöÄ Loading datasets...")
movies = pd.read_csv('tmdb_5000_movies.csv')
credits = pd.read_csv('tmdb_5000_credits.csv')

# Merge datasets on title
movies = movies.merge(credits, on='title')

# Brief EDA
print(f"‚úÖ Success! Dataset contains {movies.shape[0]} movies.")
print(f"Top Genres Found: {movies['genres'].str.count('name').sum()} entries processed.")

# --- 3. METADATA CLEANING ---
# We select columns that define the "DNA" of the movie
movies = movies[['movie_id', 'title', 'overview', 'genres', 'keywords', 'cast', 'crew']]
movies.dropna(inplace=True)

def convert_list(obj):
    """Extracts names from JSON-like strings."""
    try:
        return [i['name'] for i in ast.literal_eval(obj)]
    except:
        return []

def get_director(obj):
    """Extracts only the Director's name."""
    try:
        for i in ast.literal_eval(obj):
            if i['job'] == 'Director':
                return [i['name']]
        return []
    except:
        return []

# Apply cleaning
movies['genres'] = movies['genres'].apply(convert_list)
movies['keywords'] = movies['keywords'].apply(convert_list)
movies['cast'] = movies['cast'].apply(lambda x: [i['name'] for i in ast.literal_eval(x)[:3]] if isinstance(x, str) else [])
movies['crew'] = movies['crew'].apply(get_director)
movies['overview'] = movies['overview'].apply(lambda x: x.split())

# Remove spaces (e.g., 'Johnny Depp' -> 'JohnnyDepp') to create unique tokens
for col in ['genres', 'keywords', 'cast', 'crew']:
    movies[col] = movies[col].apply(lambda x: [i.replace(" ", "") for i in x])

# Create 'Metadata Soup' (Tags)
movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']
new_df = movies[['movie_id', 'title', 'tags']].copy()
new_df['tags'] = new_df['tags'].apply(lambda x: " ".join(x).lower())

# --- 4. VECTORIZATION & SIMILARITY ---
# Convert text to numerical vectors using Bag of Words
cv = CountVectorizer(max_features=5000, stop_words='english')
vectors = cv.fit_transform(new_df['tags']).toarray()

# Calculate Cosine Similarity (the angle between movie vectors)
similarity = cosine_similarity(vectors)

# --- 5. RECOMMENDATION ENGINE ---
def recommend(movie_title):
    try:
        idx = new_df[new_df['title'] == movie_title].index[0]
        distances = sorted(list(enumerate(similarity[idx])), reverse=True, key=lambda x: x[1])

        # Qualitative Evaluation: Return top 5 similar titles
        results = []
        for i in distances[1:6]:
            results.append(new_df.iloc[i[0]].title)
        return "\n\n".join([f"üéûÔ∏è {title}" for title in results])
    except:
        return "Movie not found. Please select from the dropdown."

# --- 6. SPACIOUS INTERACTIVE UI ---
# Custom CSS for a wide, dark, and professional look
custom_css = """
.gradio-container { max-width: 1100px !important; margin: auto; background-color: #0b0d11; border-radius: 15px; padding: 40px; }
.main-title { text-align: center; color: #E50914; font-size: 45px; font-weight: 900; margin-bottom: 0px; }
.sub-title { text-align: center; color: #ffffff; font-size: 18px; margin-bottom: 30px; opacity: 0.8; }
.movie-input { background: #1f2329 !important; border: 2px solid #E50914 !important; }
.output-text textarea { font-size: 20px !important; color: #00dfa2 !important; font-weight: bold !important; }
"""

with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
    gr.Markdown("# üé¨ MOVIE MATCH AI", elem_classes=["main-title"])
    gr.Markdown("Personalized recommendations based on plot, cast, and director metadata.", elem_classes=["sub-title"])

    with gr.Row():
        # Left Side: User Input
        with gr.Column(scale=1, variant="panel"):
            gr.Markdown("### üîç Search Selection")
            movie_dropdown = gr.Dropdown(
                choices=sorted(new_df['title'].tolist()),
                label="Select a movie you liked:",
                info="Our system uses Cosine Similarity to find movies with similar tags.",
                elem_classes=["movie-input"]
            )
            search_btn = gr.Button("Find Similar Movies", variant="primary")

        # Right Side: Results
        with gr.Column(scale=1, variant="panel"):
            gr.Markdown("### üçø Recommended for You")
            output_box = gr.Textbox(
                label="",
                placeholder="Top 5 picks will appear here...",
                lines=8,
                elem_classes=["output-text"]
            )

    # Footer Explanation
    with gr.Accordion("How it works", open=False):
        gr.Markdown("""
        ### Qualitative Evaluation & Logic
        - **Vectorization**: Each movie is converted into a 5,000-word vector.
        - **Cosine Similarity**: We calculate the cosine of the angle between vectors.
        - **Logic**: If you pick *Avatar*, the system finds movies with the most overlapping keywords like 'space', 'alien', 'future', and 'action'.
        """)

    search_btn.click(fn=recommend, inputs=movie_dropdown, outputs=output_box)

# Launch with a public shareable link
demo.launch(share=True)

üöÄ Loading datasets...
‚úÖ Success! Dataset contains 4809 movies.
Top Genres Found: 12177 entries processed.


  with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
  with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d071e85469fcc8c0b2.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


