<a href="https://colab.research.google.com/github/MahdiPourkeshavarz/4Sneak-/blob/main/geekyBot_claude.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# =============================================================================
# CELL 1: INSTALL DEPENDENCIES & INITIAL SETUP
# =============================================================================
# Run this cell first to install all required packages

!pip install -q google-generativeai pandas

import json
import pandas as pd
from google.colab import files, userdata
import google.generativeai as genai
from typing import List, Dict, Any
import textwrap

print("‚úÖ All dependencies installed successfully!")
print("üì¶ Packages loaded: google-generativeai, pandas, json")

In [None]:
# =============================================================================
# CELL 2: UPLOAD AND LOAD YOUR JSON DATA
# =============================================================================

def load_json_data():
    """
    Upload and load your movies/series JSON file.
    Supports both array format and single-object-per-line format.
    """
    print("üìÇ Please upload your JSON file containing movies/series data...")
    uploaded = files.upload()

    filename = list(uploaded.keys())[0]
    print(f"‚úÖ File '{filename}' uploaded successfully!")

    with open(filename, 'r', encoding='utf-8') as f:
        content = f.read()

    # Try parsing as JSON array first
    try:
        # Check if it's wrapped in array brackets
        if content.strip().startswith('['):
            data = json.loads(content)
        else:
            # Try parsing as JSON objects (might be missing outer brackets)
            # Add brackets to make it a valid array
            data = json.loads('[' + content + ']')
        print(f"‚úÖ Loaded {len(data)} items from JSON file")
        return data
    except json.JSONDecodeError as e:
        print(f"‚ö†Ô∏è JSON parsing error: {e}")
        print("Attempting to parse as newline-delimited JSON...")
        # Try parsing line by line
        data = []
        for line in content.strip().split('\n'):
            if line.strip():
                try:
                    data.append(json.loads(line))
                except:
                    pass
        if data:
            print(f"‚úÖ Loaded {len(data)} items")
            return data
        else:
            raise ValueError("Could not parse the JSON file. Please check the format.")

# Load the data
raw_data = load_json_data()

# Quick preview
print(f"\nüìä Data Preview:")
print(f"   Total items: {len(raw_data)}")
if raw_data:
    types = {}
    for item in raw_data:
        t = item.get('type', 'unknown')
        types[t] = types.get(t, 0) + 1
    for t, count in types.items():
        print(f"   - {t}: {count} items")

In [None]:
# =============================================================================
# CELL 3: TRANSFORM DATA INTO AI-FRIENDLY FORMAT
# =============================================================================

def extract_download_links_movie(releases: dict) -> List[Dict]:
    """Extract download links from movie releases structure."""
    links = []

    # Handle softsub
    if 'softsub' in releases and isinstance(releases['softsub'], list):
        for item in releases['softsub']:
            if isinstance(item, dict) and 'url' in item:
                links.append({
                    'type': 'softsub',
                    'quality': item.get('quality', 'Unknown'),
                    'size': item.get('size', 'Unknown'),
                    'url': item.get('url', '')
                })

    # Handle dubbed
    if 'dubbed' in releases and isinstance(releases['dubbed'], list):
        for item in releases['dubbed']:
            if isinstance(item, dict) and 'url' in item:
                links.append({
                    'type': 'dubbed',
                    'quality': item.get('quality', 'Unknown'),
                    'size': item.get('size', 'Unknown'),
                    'url': item.get('url', '')
                })

    return links

def extract_download_links_series(seasons: dict) -> Dict[str, List[Dict]]:
    """Extract download links from TV series seasons structure."""
    all_seasons = {}

    for sub_type in ['softsub', 'dubbed']:
        if sub_type in seasons and isinstance(seasons[sub_type], dict):
            for season_num, qualities in seasons[sub_type].items():
                season_key = f"Season {season_num}"
                if season_key not in all_seasons:
                    all_seasons[season_key] = []

                if isinstance(qualities, list):
                    for q in qualities:
                        if isinstance(q, dict) and 'url' in q:
                            all_seasons[season_key].append({
                                'type': sub_type,
                                'quality': q.get('quality', 'Unknown'),
                                'url': q.get('url', '')
                            })

    return all_seasons

def transform_item_to_text(item: dict, index: int) -> str:
    """
    Transform a single movie/series item into a text format
    that the AI can easily understand and reference.
    """
    item_type = item.get('type', 'unknown')
    title = item.get('title', 'Unknown Title')

    # Build the text representation
    lines = []
    lines.append(f"=== ITEM #{index + 1} ===")
    lines.append(f"TITLE: {title}")
    lines.append(f"TYPE: {item_type}")

    if 'year' in item:
        lines.append(f"YEAR: {item['year']}")

    lines.append(f"IMDB CODE: {item.get('imdbCode', 'N/A')}")
    lines.append(f"IMDB RATING: {item.get('imdbRate', 'N/A')}/10 ({item.get('imdbVotes', 0):,} votes)")
    lines.append(f"GENRE: {item.get('genre', 'N/A')}")
    lines.append(f"RUNTIME: {item.get('runtime', 'N/A')}")
    lines.append(f"COUNTRY: {item.get('country', 'N/A')}")
    lines.append(f"RATED: {item.get('rated', 'N/A')}")
    lines.append(f"DIRECTOR: {item.get('director', 'N/A')}")
    lines.append(f"ACTORS: {item.get('actors', 'N/A')}")
    lines.append(f"PLOT: {item.get('plot', 'No plot available')}")

    # Handle download links based on type
    if item_type == 'movie' and 'releases' in item:
        links = extract_download_links_movie(item['releases'])
        if links:
            lines.append("DOWNLOAD OPTIONS:")
            for link in links:
                lines.append(f"  - [{link['type'].upper()}] {link['quality']} ({link['size']}) ‚Üí {link['url']}")
        else:
            lines.append("DOWNLOAD OPTIONS: None available")

    elif item_type == 'tvSeries' and 'seasons' in item:
        total_seasons = item.get('totalSeasons', 'Unknown')
        lines.append(f"TOTAL SEASONS: {total_seasons}")
        seasons_data = extract_download_links_series(item['seasons'])
        if seasons_data:
            lines.append("DOWNLOAD OPTIONS BY SEASON:")
            for season, links in sorted(seasons_data.items()):
                lines.append(f"  {season}:")
                for link in links:
                    lines.append(f"    - [{link['type'].upper()}] {link['quality']} ‚Üí {link['url']}")
        else:
            lines.append("DOWNLOAD OPTIONS: None available")

    return '\n'.join(lines)


def build_knowledge_base(data: List[dict]) -> str:
    """
    Convert all items into a single knowledge base string.
    This will be used as context for the AI.
    """
    print("üîÑ Transforming data into AI-friendly format...")

    text_items = []
    for i, item in enumerate(data):
        text_items.append(transform_item_to_text(item, i))

    knowledge_base = '\n\n'.join(text_items)

    print(f"‚úÖ Knowledge base created!")
    print(f"   Total characters: {len(knowledge_base):,}")
    print(f"   Estimated tokens: ~{len(knowledge_base) // 4:,}")

    return knowledge_base

# Transform the data
knowledge_base = build_knowledge_base(raw_data)

# Preview first item
print("\nüìù Sample of transformed data (first item):")
print("-" * 50)
first_item = transform_item_to_text(raw_data[0], 0)
print(first_item[:1500] + "..." if len(first_item) > 1500 else first_item)

In [None]:
# =============================================================================
# CELL 4: CONFIGURE GOOGLE GEMINI API
# =============================================================================

def setup_gemini():
    """
    Setup Gemini API with your API key.
    Free tier of Gemini 1.5 Flash allows:
    - 15 RPM (requests per minute)
    - 1 million tokens per minute
    - 1500 requests per day
    """

    print("üîë Setting up Gemini API...")
    print("\nüìå To get your FREE API key:")
    print("   1. Go to: https://makersuite.google.com/app/apikey")
    print("   2. Click 'Create API Key'")
    print("   3. Copy the key and paste it below\n")

    # Try to get from Colab secrets first
    try:
        api_key = userdata.get('GEMINI_API_KEY')
        print("‚úÖ Found API key in Colab secrets!")
    except:
        api_key = input("Enter your Gemini API Key: ").strip()

    if not api_key:
        raise ValueError("API key cannot be empty!")

    # Configure the API
    genai.configure(api_key=api_key)

    # Use Gemini 1.5 Flash - it's free and fast!
    # Note: "gemini-2.0-flash" or "gemini-1.5-flash" depending on availability
    model = genai.GenerativeModel(
        model_name="gemini-1.5-flash",  # Free tier model
        generation_config={
            "temperature": 0.7,
            "top_p": 0.95,
            "top_k": 40,
            "max_output_tokens": 2048,
        },
        system_instruction="""You are a friendly and knowledgeable Movie & TV Series Recommendation Assistant.

Your job is to help users find movies and TV series based on their preferences and provide download links.

IMPORTANT RULES:
1. ONLY recommend items from the provided database - never make up movies or links
2. Always include the download link when recommending something
3. Be conversational and friendly
4. If multiple options match, suggest 2-3 best matches
5. Include relevant details like rating, genre, plot summary, and actors
6. If you can't find a match, say so honestly and ask for more preferences
7. When listing download options, clearly state the quality and type (softsub/dubbed)
8. For TV series, mention how many seasons are available

FORMAT YOUR RECOMMENDATIONS LIKE THIS:
üé¨ **Title** (Year) - Rating ‚≠ê
üìù Plot: [brief description]
üé≠ Genre: [genres] | ‚è±Ô∏è Runtime: [time]
üë• Cast: [main actors]

üì• Download Links:
[list available qualities with links]
"""
    )

    print("‚úÖ Gemini model configured successfully!")
    print(f"   Model: gemini-1.5-flash (FREE tier)")

    return model

# Setup the model
gemini_model = setup_gemini()

In [None]:
# =============================================================================
# CELL 5: CREATE THE INTELLIGENT CHAT SYSTEM
# =============================================================================

class MovieAssistant:
    def __init__(self, model, knowledge_base: str, raw_data: List[dict]):
        self.model = model
        self.knowledge_base = knowledge_base
        self.raw_data = raw_data
        self.chat_history = []

        # Create a summary for quick reference
        self.data_summary = self._create_summary()

    def _create_summary(self) -> str:
        """Create a quick summary of available content."""
        genres = set()
        countries = set()
        years = set()

        for item in self.raw_data:
            if 'genre' in item:
                for g in item['genre'].split(', '):
                    genres.add(g.strip())
            if 'country' in item:
                for c in item['country'].split(', '):
                    countries.add(c.strip())
            if 'year' in item:
                years.add(item['year'])

        movies = sum(1 for i in self.raw_data if i.get('type') == 'movie')
        series = sum(1 for i in self.raw_data if i.get('type') == 'tvSeries')

        return f"""
DATABASE SUMMARY:
- Total Movies: {movies}
- Total TV Series: {series}
- Available Genres: {', '.join(sorted(genres))}
- Countries: {', '.join(sorted(countries))}
- Years: {min(years) if years else 'N/A'} - {max(years) if years else 'N/A'}
"""

    def _smart_search(self, query: str) -> str:
        """
        Perform a smart search to find relevant items.
        This reduces the context size for the AI.
        """
        query_lower = query.lower()
        keywords = query_lower.split()

        scored_items = []

        for i, item in enumerate(self.raw_data):
            score = 0

            # Check title
            title = item.get('title', '').lower()
            if any(kw in title for kw in keywords):
                score += 10

            # Check genre
            genre = item.get('genre', '').lower()
            genre_keywords = ['comedy', 'funny', 'horror', 'scary', 'action',
                            'romance', 'romantic', 'drama', 'thriller', 'sci-fi',
                            'fantasy', 'animation', 'documentary', 'mystery',
                            'adventure', 'crime', 'family', 'music', 'war', 'western']
            for gk in genre_keywords:
                if gk in query_lower and gk in genre:
                    score += 8

            # Check country
            country = item.get('country', '').lower()
            country_keywords = ['uk', 'british', 'american', 'usa', 'korean',
                              'japanese', 'french', 'indian', 'spanish', 'german']
            for ck in country_keywords:
                if ck in query_lower and (ck in country or
                    (ck == 'uk' and 'united kingdom' in country) or
                    (ck == 'british' and 'united kingdom' in country) or
                    (ck == 'american' and 'united states' in country) or
                    (ck == 'usa' and 'united states' in country)):
                    score += 5

            # Check actors
            actors = item.get('actors', '').lower()
            if any(kw in actors for kw in keywords if len(kw) > 3):
                score += 7

            # Check plot
            plot = item.get('plot', '').lower()
            if any(kw in plot for kw in keywords if len(kw) > 3):
                score += 3

            # Boost by rating
            rating = item.get('imdbRate', 0)
            if rating >= 8:
                score += 2
            elif rating >= 7:
                score += 1

            # Check for type preference
            if 'series' in query_lower or 'show' in query_lower or 'tv' in query_lower:
                if item.get('type') == 'tvSeries':
                    score += 3
            if 'movie' in query_lower or 'film' in query_lower:
                if item.get('type') == 'movie':
                    score += 3

            if score > 0:
                scored_items.append((score, i, item))

        # Sort by score and get top results
        scored_items.sort(key=lambda x: x[0], reverse=True)
        top_items = scored_items[:15]  # Get top 15 matches

        if not top_items:
            # If no matches, return a sample for the AI to work with
            return self.knowledge_base[:10000]  # First 10k chars

        # Build context from matched items
        context_parts = []
        for score, idx, item in top_items:
            context_parts.append(transform_item_to_text(item, idx))

        return '\n\n'.join(context_parts)

    def chat(self, user_message: str) -> str:
        """
        Process user message and generate response.
        """
        # Get relevant context using smart search
        relevant_context = self._smart_search(user_message)

        # Build the prompt
        prompt = f"""
{self.data_summary}

RELEVANT DATABASE ENTRIES:
{relevant_context}

---
USER QUESTION: {user_message}

Based on the database above, please help the user. Remember:
- Only suggest items from the database
- Include download links
- Be friendly and helpful
- If nothing matches perfectly, suggest the closest options
"""

        try:
            response = self.model.generate_content(prompt)
            assistant_response = response.text

            # Store in history
            self.chat_history.append({
                'user': user_message,
                'assistant': assistant_response
            })

            return assistant_response

        except Exception as e:
            return f"‚ùå Error generating response: {str(e)}\nPlease try again or rephrase your question."

# Create the assistant
assistant = MovieAssistant(gemini_model, knowledge_base, raw_data)

print("‚úÖ Movie Assistant is ready!")
print(assistant.data_summary)

In [None]:
# =============================================================================
# CELL 6: INTERACTIVE CHAT INTERFACE
# =============================================================================

def start_chat():
    """
    Start an interactive chat session with the Movie Assistant.
    """
    print("=" * 60)
    print("üé¨ MOVIE & TV SERIES RECOMMENDATION ASSISTANT üçø")
    print("=" * 60)
    print("\nHello! I'm your movie and TV series assistant.")
    print("I have access to a database of content with download links!")
    print("\nYou can ask me things like:")
    print("  ‚Ä¢ 'Recommend a horror movie'")
    print("  ‚Ä¢ 'I want a funny British comedy'")
    print("  ‚Ä¢ 'What TV series do you have with high ratings?'")
    print("  ‚Ä¢ 'Find me something with drama and romance'")
    print("  ‚Ä¢ 'What movies do you have from 2007?'")
    print("\nType 'quit' or 'exit' to end the conversation.")
    print("=" * 60)

    while True:
        print()
        user_input = input("You: ").strip()

        if not user_input:
            continue

        if user_input.lower() in ['quit', 'exit', 'bye', 'q']:
            print("\nüëã Goodbye! Enjoy your movies!")
            break

        print("\nü§î Thinking...")
        response = assistant.chat(user_input)
        print(f"\nü§ñ Assistant:\n{response}")

# Start the interactive chat
start_chat()

In [None]:
# =============================================================================
# CELL 7: QUICK TEST - RUN SAMPLE QUERIES
# =============================================================================

# Test queries to verify everything works
test_queries = [
    "What do you have in your database?",
    "Recommend me a drama series with high rating",
    "I want to watch something with fantasy and horror",
    "What movies do you have from the late 90s?",
]

print("üß™ Running test queries...\n")
print("=" * 60)

for query in test_queries:
    print(f"\nüìù Query: {query}")
    print("-" * 40)
    response = assistant.chat(query)
    print(f"ü§ñ Response:\n{response}")
    print("=" * 60)