In [None]:
import numpy as np
import pandas as pd
import gradio as gr
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
from sklearn.preprocessing import StandardScaler

# Expanded Movie Dataset with More Genres and Movies
movies = {
    "Movie": [
        "Avengers", "Batman", "Finding Nemo", "Toy Story", "Interstellar", "Inception",
        "Spider-Man", "Thor", "The Lion King", "Coco", "Titanic", "The Notebook",
        "The Dark Knight", "Deadpool", "Frozen", "Shrek", "Gravity", "The Martian",
        "Mad Max", "Black Panther", "WALL-E", "Zootopia", "Avatar", "Guardians of the Galaxy",
        "Iron Man", "Doctor Strange", "Up", "Moana", "The Godfather", "The Shawshank Redemption",
        "Joker", "The Matrix", "John Wick", "Wonder Woman", "Harry Potter", "The Hunger Games",
        "Aladdin", "Beauty and the Beast", "Mulan", "The Incredibles", "Finding Dory",
        "How to Train Your Dragon", "The Lego Movie", "Big Hero 6", "Inside Out", "Cinderella",
        "The Revenant", "Django Unchained", "Pulp Fiction", "Fight Club", "Forrest Gump"
    ],
    "Action": [1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
               1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0],
    "Adventure": [1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
                  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
    "Sci-Fi": [1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
               0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    "Animation": [0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0,
                  0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
    "Drama": [0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1,
              1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1],
    "Avg Rating": [8.5, 8.0, 8.1, 8.3, 8.7, 8.8, 7.9, 7.5, 8.5, 8.4, 7.8, 7.6, 9.0, 8.0, 7.9, 8.0, 7.7, 8.1,
                   8.4, 8.6, 8.5, 8.2, 8.9, 8.4, 8.3, 8.2, 8.6, 8.1, 9.2, 9.3, 8.5, 8.7, 8.1, 7.8, 7.9, 7.6,
                   8.0, 7.8, 7.9, 8.2, 8.3, 8.1, 8.4, 8.5, 8.7, 7.9, 8.1, 8.9, 8.8, 9.1, 8.9]
}

df = pd.DataFrame(movies)

# Extract feature values (excluding Movie Names)
X = df.drop(columns=["Movie"]).values

# Standardizing the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Perform hierarchical clustering
linkage_matrix = linkage(X_scaled, method='ward')

# Create clusters
num_clusters = 7  # Increased number of clusters for better grouping
clusters = fcluster(linkage_matrix, num_clusters, criterion='maxclust')
df["Cluster"] = clusters

# Function to recommend a similar movie
def recommend_movie(movie_name):
    if movie_name not in df["Movie"].values:
        return "Movie not found! Please enter a valid movie name."

    # Find the cluster of the input movie
    movie_cluster = df[df["Movie"] == movie_name]["Cluster"].values[0]

    # Get a similar movie from the same cluster (excluding the input movie)
    similar_movies = df[(df["Cluster"] == movie_cluster) & (df["Movie"] != movie_name)]["Movie"].values

    if len(similar_movies) > 0:
        return f"Recommended Similar Movies: {', '.join(similar_movies[:3])}"
    else:
        return "No similar movies found in the cluster."

# Gradio UI
demo = gr.Interface(
    fn=recommend_movie,
    inputs=gr.Textbox(label="Enter a Movie Name"),
    outputs=gr.Textbox(label="Recommended Similar Movies"),
    title="🎬 Hierarchical AI Movie Recommender (Large Dataset)",
    description="Enter a movie name, and this AI will find similar movies based on hierarchical clustering!"
)

demo.launch()
