In [1]:
# Unified Movie Recommendation System
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MultiLabelBinarizer
import gradio as gr

print("="*80)
print("MOVIE RECOMMENDATION SYSTEM")
print("Search by Movie Name OR Genre")
print("="*80)



MOVIE RECOMMENDATION SYSTEM
Search by Movie Name OR Genre


In [2]:
# 1. LOAD DATASET
df = pd.read_csv('imdb-movies-dataset.csv')
cols_keep = ["Title", "Year", "Genre", "Rating"]
df = df[cols_keep].dropna(subset=["Title", "Genre"]).reset_index(drop=True)


In [3]:
# Clean and preprocess genres
df["GL"] = df["Genre"].apply(lambda x: [g.strip().title() for g in str(x).split(",")])

# Convert Rating to numeric safely
df["Rating"] = pd.to_numeric(df["Rating"], errors="coerce").fillna(0)

print("Total Movies:", len(df))
df.head(3)


Total Movies: 9993


Unnamed: 0,Title,Year,Genre,Rating,GL
0,The Idea of You,2023.0,"Comedy, Drama, Romance",6.4,"[Comedy, Drama, Romance]"
1,Kingdom of the Planet of the Apes,2023.0,"Action, Adventure, Sci-Fi",7.3,"[Action, Adventure, Sci-Fi]"
2,Unfrosted,2023.0,"Biography, Comedy, History",5.5,"[Biography, Comedy, History]"


In [4]:
# Convert genres into binary vectors for fast similarity calculation
mlb = MultiLabelBinarizer()
genre_matrix = mlb.fit_transform(df["GL"])
genre_df = pd.DataFrame(genre_matrix, columns=mlb.classes_)

# Combine back into df
df = pd.concat([df, genre_df], axis=1)
df.head(3)


Unnamed: 0,Title,Year,Genre,Rating,GL,Action,Adventure,Animation,Biography,Comedy,...,Horror,Music,Musical,Mystery,Romance,Sci-Fi,Sport,Thriller,War,Western
0,The Idea of You,2023.0,"Comedy, Drama, Romance",6.4,"[Comedy, Drama, Romance]",0,0,0,0,1,...,0,0,0,0,1,0,0,0,0,0
1,Kingdom of the Planet of the Apes,2023.0,"Action, Adventure, Sci-Fi",7.3,"[Action, Adventure, Sci-Fi]",1,1,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,Unfrosted,2023.0,"Biography, Comedy, History",5.5,"[Biography, Comedy, History]",0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0


In [5]:
def recommend_by_movie(name, n=5):
    # Find the target movie
    matches = df[df["Title"].str.contains(name, case=False, na=False)]
    if len(matches) == 0:
        return "Movie not found."
    if len(matches) > 1:
        return f"Multiple matches found: {', '.join(matches['Title'].head(5))}"
    
    target = matches.iloc[0]
    idx = df.index[df["Title"] == target["Title"]][0]

    # Compute cosine similarity
    sim = cosine_similarity([genre_matrix[idx]], genre_matrix)[0]
    df["similarity"] = sim

    # Weighted score (70% similarity + 30% rating)
    df["final_score"] = 0.7 * df["similarity"] + 0.3 * (df["Rating"] / 10)

    # Top recommendations (excluding the movie itself)
    recs = df[df["Title"] != target["Title"]].nlargest(n, "final_score")

    result = f"### Recommendations similar to **{target['Title']}**\n"
    for i, row in enumerate(recs.itertuples(), 1):
        result += f"{i}. **{row.Title}** ({row.Year}) {row.Rating}\n"
    return result


In [6]:
def recommend_by_genre(name, n=5, min_rating=0):
    # Split and normalize genre input
    g_list = [g.strip().title() for g in name.split(",")]
    
    # Filter by genre presence
    mask = df["GL"].apply(lambda x: any(g in x for g in g_list))
    subset = df[mask]
    
    if subset.empty:
        return " No movies found in that genre."
    
    # Filter by rating
    subset = subset[subset["Rating"] >= min_rating]

    # Sort by rating


In [7]:
with gr.Blocks(theme=gr.themes.Soft()) as app:
    gr.Markdown("# Movie Recommender System")

    with gr.Row():
        search_box = gr.Textbox(label="Search", placeholder="Enter movie name or genre...")
        mode = gr.Radio(["movie", "genre"], value="movie", label="Search Type")

    with gr.Row():
        num_results = gr.Slider(3, 10, value=5, step=1, label="Number of Results")
        min_rating = gr.Slider(0, 10, value=7, step=0.5, label="Minimum Rating (genre only)")

    output = gr.Markdown(label="Results")
    btn = gr.Button("Search")

    def search_handler(name, n, mode, min_r):
        if mode == "movie":
            return recommend_by_movie(name, n)
        else:
            return recommend_by_genre(name, n, min_r)

    btn.click(search_handler, inputs=[search_box, num_results, mode, min_rating], outputs=output)

app.launch()


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


