In [11]:
import pandas as pd
import numpy as np
import nltk
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr
import ast

In [12]:
# Load Data
movies_path = '/content/drive/MyDrive/Datasets/tmdb_5000_movies.csv'
credits_path = '/content/drive/MyDrive/Datasets/tmdb_5000_credits.csv'

movies = pd.read_csv(movies_path)
credits = pd.read_csv(credits_path)

# Merge datasets
df = movies.merge(credits, on='title', how='inner')

# Keep relevant columns
df = df[['id', 'title', 'overview', 'genres', 'keywords', 'cast', 'crew']]
df = df.dropna()  # Remove missing values

In [13]:
# Convert 'genres', 'keywords', 'cast', 'crew' to strings
def extract_names(obj):
    obj = ast.literal_eval(obj)
    return ' '.join([i['name'] for i in obj])

df['genres'] = df['genres'].apply(extract_names)
df['keywords'] = df['keywords'].apply(extract_names)
df['cast'] = df['cast'].apply(lambda x: ' '.join([i['name'] for i in ast.literal_eval(x)[:3]]))  # Top 3 actors
df['crew'] = df['crew'].apply(lambda x: ' '.join([i['name'] for i in ast.literal_eval(x) if i['job'] == 'Director']))  # Only Director

df['tags'] = df['overview'] + ' ' + df['genres'] + ' ' + df['keywords'] + ' ' + df['cast'] + ' ' + df['crew']
df = df[['id', 'title', 'tags']]

def stemming(text):
    ps = PorterStemmer()
    return ' '.join([ps.stem(word) for word in text.split()])

df['tags'] = df['tags'].apply(stemming)

In [14]:
# Apply TF-IDF
vectorizer = TfidfVectorizer(stop_words='english')
vectorized_matrix = vectorizer.fit_transform(df['tags'])

# Compute cosine similarity
cosine_sim = cosine_similarity(vectorized_matrix)

In [15]:
# Recommendation function
def recommend(movie_name):
    movie_name = movie_name.lower()
    if movie_name not in df['title'].str.lower().values:
        return ["Movie not found!"]

    idx = df[df['title'].str.lower() == movie_name].index[0]
    similarity_scores = list(enumerate(cosine_sim[idx]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    top_movies = similarity_scores[1:6]  # Get top 5

    recommendations = [df.iloc[i[0]].title for i in top_movies]
    return recommendations

In [16]:
def gradio_recommend(movie_name):
    recommendations = recommend(movie_name)
    # Convert the recommendations to markdown format
    return "\n\n".join([f"**{i+1}. {title}**" for i, title in enumerate(recommendations)])

gui = gr.Interface(
    fn=gradio_recommend,
    inputs=gr.Textbox(label="Enter a Movie Title", placeholder="E.g., Titanic"),
    outputs=gr.Markdown(),
    title="🎬 Movie Recommendation System",
    description="Discover movies similar to your favorite ones! Enter a movie title below, and our system will recommend the best matches based on content similarity.",
    theme="default",
    allow_flagging="never",
)

gui.launch()



Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f69470426fedd814f4.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


