In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Install required libraries
!pip install -q gradio sentence-transformers transformers spacy fuzzywuzzy python-Levenshtein
!python -m spacy download en_core_web_sm

import pandas as pd
import numpy as np
import gradio as gr
import ast
import re
import spacy
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Load emotion classification model
emotion_classifier = pipeline(
    "text-classification",
    model="j-hartmann/emotion-english-distilroberta-base",
    top_k=1
)

emotion_to_genre = {
    "joy": ["comedy", "adventure", "animation"],
    "sadness": ["drama", "romance"],
    "anger": ["action", "thriller"],
    "fear": ["horror", "mystery"],
    "love": ["romance", "comedy"],
    "surprise": ["fantasy", "sci-fi"]
}

# Load datasets
df = pd.read_csv("/content/drive/MyDrive/tmdb_5000_movies.csv")
credits = pd.read_csv("/content/drive/MyDrive/tmdb_5000_credits.csv")
df = df.merge(credits, on='title')
df.fillna('', inplace=True)

# Extract director
def get_director(crew_str):
    try:
        crew = ast.literal_eval(crew_str)
        for person in crew:
            if person.get('job') == 'Director':
                return person.get('name')
    except:
        return ''
    return ''

df['director'] = df['crew'].apply(get_director)

# Extract top 5 cast members
def get_top_cast(cast):
    try:
        return " ".join([person['name'] for person in ast.literal_eval(cast)][:5])
    except:
        return ''

df['cast'] = df['cast'].apply(get_top_cast)

# Create unified text field
df['release_year'] = df['release_date'].str[:4]
df['text'] = df['title'] + ' ' + df['genres'] + ' ' + df['overview'] + ' ' + df['cast'] + ' ' + df['director']
df.fillna('', inplace=True)

# Load embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')
df['embedding'] = df['text'].apply(lambda x: model.encode(x, convert_to_tensor=True))

# Favorite and lock database
user_data = {"favorites": set(), "locked": set()}

# Format movie response
def format_movie_response(movies_df):
    if movies_df.empty:
        return "😕 Sorry, I couldn't find any matching movies."
    response = ""
    for i, row in movies_df.iterrows():
        response += f"🎬 *{row['title']}*"
        if row['release_year']:
            response += f" ({row['release_year']})"
        response += f" - {row['overview'][:200].strip()}...\n\n"
    return response.strip()

# Extract entities from prompt
def extract_entities(prompt):
    doc = nlp(prompt)
    entities = {
        "movie_titles": [],
        "people": [],
        "dates": [],
        "genres": [],
        "number": 5
    }
    for ent in doc.ents:
        if ent.label_ == "PERSON":
            entities["people"].append(ent.text)
        elif ent.label_ == "DATE":
            entities["dates"].append(ent.text)
        elif ent.label_ == "CARDINAL" and ent.text.isdigit():
            entities["number"] = int(ent.text)
        elif ent.label_ == "WORK_OF_ART":
            entities["movie_titles"].append(ent.text)

    match = re.search(r'\b(\d{1,2})\b', prompt)
    if match:
        entities["number"] = int(match.group(1))

    genre_list = ['action', 'romance', 'comedy', 'thriller', 'horror', 'drama', 'sci-fi', 'fantasy']
    for token in doc:
        if token.text.lower() in genre_list:
            entities["genres"].append(token.text.lower())

    return entities

# AI Assistant Function
def smart_ai_assistant(prompt):
    prompt = prompt.strip().lower()
    entities = extract_entities(prompt)
    top_n = entities.get("number", 5)

    # App feature explanations
    if "feature" in prompt or "kya" in prompt and "app" in prompt:
        return "📱 This app allows you to:\n- Get smart movie/series/sports recommendations\n- Add or remove movies from favorites\n- Lock/unlock movies\n- Ask about current matches, genres, or actors\n- Receive real-time links."

    # Favorites management
    for title in entities["movie_titles"]:
        if "favorite" in prompt or "fav" in prompt:
            user_data["favorites"].add(title.lower())
            return f"✅ '{title}' has been added to your favorites."

    if "favorite" in prompt and ("show" in prompt or "kaun" in prompt):
        if not user_data["favorites"]:
            return "❤️ You have no favorite movies yet."
        return "❤️ Your favorite movies:\n" + "\n".join(user_data["favorites"])

    # Lock/unlock management
    for title in entities["movie_titles"]:
        if "lock" in prompt:
            user_data["locked"].add(title.lower())
            return f"🔒 '{title}' has been locked."
        if "unlock" in prompt:
            if title.lower() in user_data["locked"]:
                user_data["locked"].remove(title.lower())
                return f"🔓 '{title}' has been unlocked."
            return f"⚠️ '{title}' is not locked."

    if "lock" in prompt and ("kaun" in prompt or "show" in prompt):
        if not user_data["locked"]:
            return "🔓 No movies are currently locked."
        return "🔒 Locked movies:\n" + "\n".join(user_data["locked"])

    # Emotion-based filtering
    try:
        emotion = emotion_classifier(prompt)[0]['label'].lower()
        matched_genres = emotion_to_genre.get(emotion, [])
        if matched_genres:
            genre_filter = '|'.join(matched_genres)
            emotion_filtered = df[df['genres'].str.lower().str.contains(genre_filter)]
            if not emotion_filtered.empty:
                return f"🧠 Based on your mood (*{emotion}*), here are some picks:\n\n" + format_movie_response(emotion_filtered.head(top_n))
    except:
        pass

    # Movie similarity
    if entities['movie_titles']:
        title = entities['movie_titles'][0]
        match = df[df['title'].str.lower() == title.lower()]
        if not match.empty:
            embedding = model.encode(match.iloc[0]['text'], convert_to_tensor=True)
            similarities = df['embedding'].apply(lambda x: util.cos_sim(embedding, x).item())
            top_indices = np.argsort(similarities)[-top_n:][::-1]
            top_matches = df.iloc[top_indices]
            return format_movie_response(top_matches)

    # Actor/director
    if entities["people"]:
        people = entities["people"][0].lower()
        result = df[df['cast'].str.lower().str.contains(people) | df['director'].str.lower().str.contains(people)]
        return format_movie_response(result.head(top_n))

    # Year
    for year in entities["dates"]:
        if year.isdigit() and len(year) == 4:
            result = df[df['release_year'] == year]
            return format_movie_response(result.head(top_n))

    # Genre
    if entities["genres"]:
        genre = entities["genres"][0]
        result = df[df['genres'].str.lower().str.contains(genre)]
        return format_movie_response(result.head(top_n))

    # Semantic similarity fallback
    prompt_embedding = model.encode(prompt, convert_to_tensor=True)
    similarities = df['embedding'].apply(lambda x: util.cos_sim(prompt_embedding, x).item())
    top_indices = np.argsort(similarities)[-top_n:][::-1]
    return format_movie_response(df.iloc[top_indices])

# Gradio interface
interface = gr.Interface(
    fn=smart_ai_assistant,
    inputs=gr.Textbox(lines=2, placeholder="Ask me anything about movies, sports, features, or more..."),
    outputs="text",
    title="🎥 Smart AI Assistant for Movies & More",
    description="Chat with your smart assistant: Ask about movies, lock/fav status, actors, sports, or even what the app can do!"
)

interface.launch()


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.1/54.1 MB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.9/322.9 kB[0m [31m20.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m161.7/161.7 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.5/11.5 MB[0m [31m72.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m55.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.00k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


pytorch_model.bin:   0%|          | 0.00/329M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/294 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/329M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Device set to use cpu
  df.fillna('', inplace=True)


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://a6912216159abcffa1.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


