## Section 1: Install and Import Required Libraries

In [None]:
# Install required packages
!pip install -q google-generativeai pandas numpy

import os
import json
import pandas as pd
import numpy as np
from typing import Optional, List, Tuple
import re
from difflib import SequenceMatcher

# Import Gemini API
import google.generativeai as genai

print("‚úì All libraries imported successfully!")

## Section 2: Load and Explore the Movies Dataset

In [None]:
# Load the dataset
df = pd.read_csv('movies.csv')
df.columns = df.columns.str.lower()

print(f"Dataset Shape: {df.shape}")
print(f"\nColumn Names:")
print(df.columns.tolist())
print(f"\nFirst few rows:")
df.head(2)

In [None]:
# Explore dataset info
print(f"Total movies: {len(df)}")
print(f"\nMissing values:")
print(df.isnull().sum())
print(f"\nData types:")
print(df.dtypes)

In [None]:
# Check sample data
print("Sample Movie (Avatar):")
print(df[df['title'].str.contains('Avatar', case=False, na=False)][['title', 'genres', 'vote_average', 'budget', 'revenue']].head(1))

## Section 3: Prepare Data for the Chatbot

In [None]:
# Create searchable index
class MovieIndex:
    """Create an index for fast movie searches"""
,

## Section 4: Set Up Gemini API Integration

In [None]:
# Configure Gemini API
api_key = input("üîë Enter your Google Gemini API key: ")

if api_key.strip():
    genai.configure(api_key=api_key)
    model = genai.GenerativeModel('gemini-pro')
    gemini_available = True
    print("‚úì Gemini API configured successfully!")
else:
    gemini_available = False
    print("‚ö†Ô∏è  Gemini API key not provided. General queries will be limited.")

In [None]:
# Function to call Gemini API
def call_gemini_api(query: str) -> str:
    """Call Gemini API for general movie knowledge"""
    if not gemini_available:
        return "Gemini API not configured. Please provide an API key."

    try:
        prompt = f"""You are a helpful movie expert chatbot.
        Answer this movie question: {query}
        Provide a concise and informative response."""

        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        return f"Error: {str(e)}"

print("‚úì Gemini API functions created!")

## Section 5: Build the Chatbot Function

In [None]:
def similarity_score(s1: str, s2: str) -> float:
    """Calculate similarity between two strings"""
    return SequenceMatcher(None, s1.lower(), s2.lower()).ratio()

def search_movies(query: str, threshold: float = 0.6) -> List[Tuple]:
    """Search movies by similarity"""
    matches = []
    query_lower = query.lower()

    search_fields = ['title', 'genres', 'keywords', 'cast', 'overview']

    for idx, row in df.iterrows():
        for field in search_fields:
            if field not in df.columns or pd.isna(row[field]):
                continue

            value = str(row[field]).lower()

            # Exact substring match
            if query_lower in value:
                matches.append((idx, row, 1.0))
                break
            # Fuzzy match
            elif similarity_score(query_lower, value) >= threshold:
                score = similarity_score(query_lower, value)
                matches.append((idx, row, score))
                break

    # Remove duplicates and sort
    seen = set()
    unique_matches = []
    for idx, row, score in sorted(matches, key=lambda x: x[2], reverse=True):
        if idx not in seen:
            seen.add(idx)
            unique_matches.append((row, score))

    return unique_matches[:5]

print("‚úì Search functions created!")

In [None]:
def format_movie_info(movie: pd.Series, detailed: bool = False) -> str:
    """Format movie information for display"""
    info = f"\nüé¨ **{movie.get('title', 'Unknown')}**"

    if 'release_date' in movie and pd.notna(movie['release_date']):
        year = str(movie['release_date'])[:4]
        info += f" ({year})"

    if not detailed:
        # Brief info
        if 'vote_average' in movie and pd.notna(movie['vote_average']):
            info += f"\n‚≠ê Rating: {movie['vote_average']}/10"
        if 'genres' in movie and pd.notna(movie['genres']):
            info += f"\nüé≠ Genres: {movie['genres']}"
        if 'overview' in movie and pd.notna(movie['overview']):
            overview = str(movie['overview'])[:150]
            info += f"\nüìù {overview}..."
    else:
        # Detailed info
        if 'vote_average' in movie and pd.notna(movie['vote_average']):
            info += f"\n‚≠ê Rating: {movie['vote_average']}/10"
        if 'genres' in movie and pd.notna(movie['genres']):
            info += f"\nüé≠ Genres: {movie['genres']}"
        if 'budget' in movie and pd.notna(movie['budget']) and movie['budget'] > 0:
            budget = int(movie['budget'])
            info += f"\nüí∞ Budget: ${budget:,}"
        if 'revenue' in movie and pd.notna(movie['revenue']) and movie['revenue'] > 0:
            revenue = int(movie['revenue'])
            info += f"\nüíµ Revenue: ${revenue:,}"
        if 'runtime' in movie and pd.notna(movie['runtime']):
            info += f"\n‚è±Ô∏è  Runtime: {int(movie['runtime'])} min"
        if 'director' in movie and pd.notna(movie['director']):
            info += f"\nüé• Director: {movie['director']}"

    return info

print("‚úì Formatting functions created!")

In [None]:
def answer_from_dataset(query: str) -> Optional[str]:
    """Try to answer using the dataset"""
    query_lower = query.lower()

    # Pattern: Highest rated
    if any(word in query_lower for word in ['highest', 'best', 'top rated', 'best rated']):
        if 'genre' in query_lower:
            genre_match = re.search(r'(action|comedy|drama|thriller|horror|romance|sci-fi|animation|adventure)', query_lower)
            if genre_match:
                genre = genre_match.group(1)
                filtered = df[df['genres'].str.contains(genre, case=False, na=False)]
                if not filtered.empty:
                    best = filtered.nlargest(1, 'vote_average').iloc[0]
                    return f"Top {genre.title()} Movie:\n{format_movie_info(best, True)}"

        best_movies = df.nlargest(3, 'vote_average')
        response = "\nüèÜ **Top Rated Movies:**"
        for _, movie in best_movies.iterrows():
            response += format_movie_info(movie, False)
        return response

    # Pattern: Most expensive
    if any(word in query_lower for word in ['expensive', 'highest budget', 'most costly']):
        expensive = df[df['budget'] > 0].nlargest(3, 'budget')
        response = "\nüí∏ **Most Expensive Movies:**"
        for _, movie in expensive.iterrows():
            response += format_movie_info(movie, False)
        return response

    # Pattern: Longest movies
    if any(word in query_lower for word in ['longest', 'longest runtime', 'longest movie']):
        longest = df.nlargest(3, 'runtime')
        response = "\n‚è±Ô∏è  **Longest Movies:**"
        for _, movie in longest.iterrows():
            response += format_movie_info(movie, False)
        return response

    # Pattern: Movie search
    matches = search_movies(query)
    if matches:
        response = "\nüé¨ **Found Movies:**"
        for movie, score in matches:
            response += format_movie_info(movie, True)
        return response

    return None

print("‚úì Dataset answer function created!")

In [None]:
def answer_question(query: str) -> str:
    """Main function to answer questions"""
    # Try dataset first
    dataset_answer = answer_from_dataset(query)
    if dataset_answer:
        return dataset_answer

    # Fall back to Gemini
    if gemini_available:
        return call_gemini_api(query)

    return "I couldn't find information about this. Try asking about specific movies, ratings, or budgets."

print("‚úì Main question answering function created!")

## Section 6: Create Interactive Chat Interface

In [None]:
def display_welcome():
    """Display welcome message"""
    print("\n" + "="*60)
    print("üé¨ Welcome to Movie Chatbot!")
    print("="*60)
    print("\nYou can ask about:")
    print("  ‚Ä¢ Specific movies (e.g., 'Tell me about Avatar')")
    print("  ‚Ä¢ Highest/lowest rated movies")
    print("  ‚Ä¢ Most expensive movies")
    print("  ‚Ä¢ Longest movies")
    print("  ‚Ä¢ General movie questions")
    print("\nType 'exit' to quit\n")

def start_chat():
    """Start the interactive chat"""
    display_welcome()

    while True:
        try:
            user_input = input("You: ").strip()

            if user_input.lower() == 'exit':
                print("\nüëã Thank you for chatting! Goodbye!")
                break

            if not user_input:
                continue

            response = answer_question(user_input)
            print(f"\nBot: {response}\n")

        except KeyboardInterrupt:
            print("\n\nüëã Chatbot closed.")
            break
        except Exception as e:
            print(f"Error: {str(e)}\n")

print("‚úì Chat interface created!")

## Section 7: Test the Chatbot with Sample Queries

In [None]:
# Test Query 1: Movie search
print("\n" + "="*60)
print("Test 1: Search for a specific movie")
print("="*60)
print("Query: Tell me about Avatar")
print("\nBot Response:")
print(answer_question("Tell me about Avatar"))

In [None]:
# Test Query 2: Highest rated
print("\n" + "="*60)
print("Test 2: Find highest rated movies")
print("="*60)
print("Query: What are the highest rated movies?")
print("\nBot Response:")
print(answer_question("What are the highest rated movies?"))

In [None]:
# Test Query 3: Most expensive
print("\n" + "="*60)
print("Test 3: Find most expensive movies")
print("="*60)
print("Query: What are the most expensive movies?")
print("\nBot Response:")
print(answer_question("What are the most expensive movies?"))

In [None]:
# Test Query 4: Longest movies
print("\n" + "="*60)
print("Test 4: Find longest movies")
print("="*60)
print("Query: Show me the longest movies")
print("\nBot Response:")
print(answer_question("Show me the longest movies"))

In [None]:
# Test Query 5: General movie knowledge (Gemini API)
if gemini_available:
    print("\n" + "="*60)
    print("Test 5: General movie knowledge (Gemini API)")
    print("="*60)
    print("Query: How are movies rated?")
    print("\nBot Response:")
    print(answer_question("How are movies rated?"))
else:
    print("\n‚ö†Ô∏è  Gemini API not available for general knowledge test")

## Start Interactive Chat

Uncomment the cell below to start the interactive chat interface:

In [None]:
# Uncomment to start the interactive chat
# start_chat()

print("\n‚úÖ Chatbot is ready!")
print("\nTo start chatting:")
print("1. Uncomment the start_chat() call above")
print("2. Run this cell")
print("3. Ask questions about movies!")