In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from datetime import datetime
from collections import Counter
from transformers import BertTokenizer, BertModel
import random
import json

# Set device - use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# --------------------------------
# Minimal Databases
# --------------------------------
# User database - 5 rows
USERS_DB = [
    {"user_id": 1, "username": "TechEnthusiast", "age": 28, "gender": "Male", "income_bracket": "high"},
    {"user_id": 2, "username": "FamilyFirst", "age": 35, "gender": "Female", "income_bracket": "medium"},
    {"user_id": 3, "username": "AdventureSeeker", "age": 24, "gender": "Male", "income_bracket": "medium"},
    {"user_id": 4, "username": "LuxuryLover", "age": 31, "gender": "Female", "income_bracket": "high"},
    {"user_id": 5, "username": "BudgetShopper", "age": 42, "gender": "Non-binary", "income_bracket": "medium"}
]

# Watch history database - 5 entries per user
WATCH_HISTORY_DB = [
    # TechEnthusiast
    {"user_id": 1, "content_id": 101, "rating": 4.8},  # Inception
    {"user_id": 1, "content_id": 102, "rating": 5.0},  # The Matrix
    {"user_id": 1, "content_id": 104, "rating": 4.9},  # Interstellar
    {"user_id": 1, "content_id": 105, "rating": 4.7},  # Blade Runner
    {"user_id": 1, "content_id": 108, "rating": 4.5},  # Dune

    # FamilyFirst
    {"user_id": 2, "content_id": 125, "rating": 5.0},  # Finding Nemo
    {"user_id": 2, "content_id": 126, "rating": 4.9},  # Toy Story
    {"user_id": 2, "content_id": 127, "rating": 4.8},  # Inside Out
    {"user_id": 2, "content_id": 128, "rating": 4.7},  # Coco
    {"user_id": 2, "content_id": 129, "rating": 4.5},  # Up

    # AdventureSeeker
    {"user_id": 3, "content_id": 131, "rating": 5.0},  # Into the Wild
    {"user_id": 3, "content_id": 132, "rating": 4.9},  # The Secret Life of Walter Mitty
    {"user_id": 3, "content_id": 133, "rating": 4.8},  # 127 Hours
    {"user_id": 3, "content_id": 134, "rating": 4.7},  # Wild
    {"user_id": 3, "content_id": 110, "rating": 4.6},  # Mad Max: Fury Road

    # LuxuryLover
    {"user_id": 4, "content_id": 136, "rating": 5.0},  # The Devil Wears Prada
    {"user_id": 4, "content_id": 137, "rating": 4.9},  # Great Gatsby
    {"user_id": 4, "content_id": 138, "rating": 4.8},  # Crazy Rich Asians
    {"user_id": 4, "content_id": 139, "rating": 4.7},  # Breakfast at Tiffany's
    {"user_id": 4, "content_id": 118, "rating": 4.5},  # La La Land

    # BudgetShopper
    {"user_id": 5, "content_id": 141, "rating": 5.0},  # The Pursuit of Happyness
    {"user_id": 5, "content_id": 142, "rating": 4.9},  # Little Miss Sunshine
    {"user_id": 5, "content_id": 143, "rating": 4.8},  # The Florida Project
    {"user_id": 5, "content_id": 144, "rating": 4.7},  # Nomadland
    {"user_id": 5, "content_id": 145, "rating": 4.5}   # Lady Bird
]

# User comments database - 1 comment per user
USER_COMMENTS_DB = [
    {"user_id": 1, "comment": "I love sci-fi movies with complex technology concepts and philosophical undertones. The special effects and innovative storytelling techniques really captivate me."},
    {"user_id": 2, "comment": "Family-friendly content that teaches values and has wholesome messages is always our go-to. We enjoy watching animated films together that are both entertaining and educational."},
    {"user_id": 3, "comment": "Outdoor adventure films inspire me to travel and explore nature. I'm drawn to stories about personal journeys, physical challenges, and breathtaking landscapes."},
    {"user_id": 4, "comment": "Luxury fashion and high-society dramas are my guilty pleasure. I appreciate the beautiful aesthetics, elegant settings, and sophisticated storylines with a touch of glamour."},
    {"user_id": 5, "comment": "I prefer realistic movies that deal with everyday economic struggles and finding value in simple things. Stories about financial resilience and practical solutions resonate with me."}
]

# Content database - 5 movies from each category
CONTENT_DB = [
    # Tech/Sci-Fi
    {
        "content_id": 101,
        "title": "Inception",
        "genres": "sci-fi,thriller,action",
        "mood": "thought-provoking,tense",
        "themes": "reality,technology",
        "ad_suitability": "technology,luxury"
    },
    {
        "content_id": 102,
        "title": "The Matrix",
        "genres": "sci-fi,action",
        "mood": "rebellious,enlightening",
        "themes": "reality,technology",
        "ad_suitability": "technology,gaming"
    },
    {
        "content_id": 104,
        "title": "Interstellar",
        "genres": "sci-fi,drama,adventure",
        "mood": "wonder,emotional",
        "themes": "love,survival",
        "ad_suitability": "technology,education"
    },
    {
        "content_id": 105,
        "title": "Blade Runner",
        "genres": "sci-fi,thriller,noir",
        "mood": "bleak,atmospheric",
        "themes": "humanity,technology",
        "ad_suitability": "technology,fashion"
    },
    {
        "content_id": 108,
        "title": "Dune",
        "genres": "sci-fi,adventure,epic",
        "mood": "awe-inspiring,immersive",
        "themes": "power,ecology",
        "ad_suitability": "technology,sustainability"
    },

    # Family
    {
        "content_id": 125,
        "title": "Finding Nemo",
        "genres": "animation,adventure,family",
        "mood": "touching,uplifting",
        "themes": "family,friendship",
        "ad_suitability": "family,education"
    },
    {
        "content_id": 126,
        "title": "Toy Story",
        "genres": "animation,adventure,family",
        "mood": "playful,heartwarming",
        "themes": "friendship,growing up",
        "ad_suitability": "toys,family"
    },
    {
        "content_id": 127,
        "title": "Inside Out",
        "genres": "animation,family",
        "mood": "moving,thoughtful",
        "themes": "emotions,childhood",
        "ad_suitability": "family,education"
    },
    {
        "content_id": 128,
        "title": "Coco",
        "genres": "animation,family",
        "mood": "touching,colorful",
        "themes": "family,cultural heritage",
        "ad_suitability": "family,music"
    },
    {
        "content_id": 129,
        "title": "Up",
        "genres": "animation,adventure,family",
        "mood": "bittersweet,touching",
        "themes": "friendship,adventure",
        "ad_suitability": "travel,family"
    },

    # Adventure/Outdoor
    {
        "content_id": 131,
        "title": "Into the Wild",
        "genres": "adventure,biography,drama",
        "mood": "introspective,free-spirited",
        "themes": "nature,self-discovery",
        "ad_suitability": "outdoor,travel"
    },
    {
        "content_id": 132,
        "title": "The Secret Life of Walter Mitty",
        "genres": "adventure,comedy,drama",
        "mood": "inspiring,heartwarming",
        "themes": "adventure,imagination",
        "ad_suitability": "travel,photography"
    },
    {
        "content_id": 133,
        "title": "127 Hours",
        "genres": "adventure,biography,drama",
        "mood": "intense,triumphant",
        "themes": "survival,isolation",
        "ad_suitability": "outdoor,fitness"
    },
    {
        "content_id": 134,
        "title": "Wild",
        "genres": "adventure,biography,drama",
        "mood": "introspective,determined",
        "themes": "healing,nature",
        "ad_suitability": "outdoor,hiking gear"
    },
    {
        "content_id": 110,
        "title": "Mad Max: Fury Road",
        "genres": "action,adventure,sci-fi",
        "mood": "intense,chaotic",
        "themes": "survival,freedom",
        "ad_suitability": "automotive,outdoor"
    },

    # Luxury/Fashion
    {
        "content_id": 136,
        "title": "The Devil Wears Prada",
        "genres": "comedy,drama",
        "mood": "aspirational,fashionable",
        "themes": "ambition,fashion",
        "ad_suitability": "fashion,luxury"
    },
    {
        "content_id": 137,
        "title": "The Great Gatsby",
        "genres": "drama,romance",
        "mood": "decadent,glamorous",
        "themes": "wealth,love",
        "ad_suitability": "luxury,fashion"
    },
    {
        "content_id": 138,
        "title": "Crazy Rich Asians",
        "genres": "comedy,drama,romance",
        "mood": "aspirational,dazzling",
        "themes": "wealth,cultural identity",
        "ad_suitability": "luxury,travel"
    },
    {
        "content_id": 139,
        "title": "Breakfast at Tiffany's",
        "genres": "comedy,drama,romance",
        "mood": "elegant,charming",
        "themes": "identity,belonging",
        "ad_suitability": "jewelry,fashion"
    },
    {
        "content_id": 118,
        "title": "La La Land",
        "genres": "comedy,drama,music,romance",
        "mood": "romantic,hopeful",
        "themes": "dreams,love",
        "ad_suitability": "music,fashion"
    },

    # Budget/Practical
    {
        "content_id": 141,
        "title": "The Pursuit of Happyness",
        "genres": "biography,drama",
        "mood": "hopeful,determined",
        "themes": "perseverance,poverty",
        "ad_suitability": "finance,education"
    },
    {
        "content_id": 142,
        "title": "Little Miss Sunshine",
        "genres": "comedy,drama",
        "mood": "charming,humorous",
        "themes": "family,acceptance",
        "ad_suitability": "automotive,family"
    },
    {
        "content_id": 143,
        "title": "The Florida Project",
        "genres": "drama",
        "mood": "poignant,immersive",
        "themes": "childhood,poverty",
        "ad_suitability": "budget services,family"
    },
    {
        "content_id": 144,
        "title": "Nomadland",
        "genres": "drama",
        "mood": "reflective,peaceful",
        "themes": "independence,community",
        "ad_suitability": "budget travel,outdoor"
    },
    {
        "content_id": 145,
        "title": "Lady Bird",
        "genres": "comedy,drama",
        "mood": "intimate,relatable",
        "themes": "coming of age,identity",
        "ad_suitability": "education,budget fashion"
    }
]

# Ad category database - 5 categories
AD_CATEGORIES_DB = [
    {
        "category_id": 1,
        "name": "Technology",
        "description": "Tech products and services",
        "target_demographics": "18-45, tech-savvy, higher income",
        "suitable_contexts": "sci-fi,thriller,action",
        "sample_products": "smartphones, laptops, smart home devices, gaming gear, tech services"
    },
    {
        "category_id": 2,
        "name": "Family",
        "description": "Products for family and children",
        "target_demographics": "25-45, parents, family-oriented",
        "suitable_contexts": "animation,family,comedy",
        "sample_products": "toys, educational apps, family activities, food, children's products"
    },
    {
        "category_id": 3,
        "name": "Outdoor",
        "description": "Outdoor and adventure products",
        "target_demographics": "18-40, active lifestyle, adventure seekers",
        "suitable_contexts": "adventure,nature,documentary",
        "sample_products": "hiking gear, camping equipment, travel packages, sports equipment"
    },
    {
        "category_id": 4,
        "name": "Luxury",
        "description": "High-end luxury products",
        "target_demographics": "25-60, higher income, status-conscious",
        "suitable_contexts": "drama,fashion,romance",
        "sample_products": "designer fashion, jewelry, luxury cars, premium cosmetics"
    },
    {
        "category_id": 5,
        "name": "Finance",
        "description": "Financial services and products",
        "target_demographics": "25-65, budget-conscious, practical",
        "suitable_contexts": "drama,documentary,biographical",
        "sample_products": "banking apps, investment tools, insurance, financial planning"
    }
]

# --------------------------------
# Data Access Functions
# --------------------------------
def get_user_data(user_id):
    """Get all data for a specific user from our databases"""
    # Get user info
    user = next((u for u in USERS_DB if u["user_id"] == user_id), None)
    if not user:
        return None

    # Get watch history
    watch_history = [w for w in WATCH_HISTORY_DB if w["user_id"] == user_id]

    # Get comments
    comments = [c["comment"] for c in USER_COMMENTS_DB if c["user_id"] == user_id]

    # Get content details for watch history
    content_details = []
    for history in watch_history:
        content = next((c for c in CONTENT_DB if c["content_id"] == history["content_id"]), None)
        if content:
            content_with_rating = {**content, "user_rating": history["rating"]}
            content_details.append(content_with_rating)

    return {
        "user_info": user,
        "watch_history": content_details,
        "comments": comments
    }

def get_content_data(content_id):
    """Get details for a specific content item"""
    return next((c for c in CONTENT_DB if c["content_id"] == content_id), None)

def get_ad_categories():
    """Get all ad categories"""
    return AD_CATEGORIES_DB

def get_all_users():
    """Get basic info for all users"""
    return [(u["user_id"], u["username"]) for u in USERS_DB]

def get_all_content():
    """Get basic info for all content"""
    return [(c["content_id"], c["title"], c["genres"]) for c in CONTENT_DB]

# --------------------------------
# BERT-based User Profiler
# --------------------------------
class BERTUserProfiler:
    def __init__(self):
        print("Initializing BERT-based User Profiler...")

        # Define categories of interest
        self.interest_categories = [
            "technology", "innovation", "gadgets", "family", "children", "education",
            "adventure", "outdoor", "travel", "fitness", "luxury", "fashion",
            "budget", "finance", "practical", "entertainment", "gaming"
        ]

        # Initialize BERT model
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.model = BertModel.from_pretrained('bert-base-uncased').to(device)
        print("BERT model loaded successfully")

    def profile_user(self, user_data):
        """Create a user profile based on watch history and comments"""
        # Initialize counters
        genre_preferences = Counter()
        interests = Counter()
        ad_category_affinity = Counter()

        # Process watch history
        for content in user_data.get("watch_history", []):
            # Weight based on rating
            weight = max(0.5, min(1.5, content.get("user_rating", 3.0) / 3.3))

            # Process genres
            if "genres" in content:
                for genre in content["genres"].split(","):
                    genre_preferences[genre.strip()] += weight

            # Process ad suitability
            if "ad_suitability" in content:
                for category in content["ad_suitability"].split(","):
                    ad_category_affinity[category.strip()] += weight

        # Process comments to extract interests using BERT
        all_comments = " ".join(user_data.get("comments", []))
        if all_comments:
            # Simple keyword extraction first
            for interest in self.interest_categories:
                if interest in all_comments.lower():
                    interests[interest] += 1

            # Use BERT for semantic understanding
            with torch.no_grad():
                inputs = self.tokenizer(all_comments, return_tensors="pt", truncation=True, max_length=512).to(device)
                outputs = self.model(**inputs)

                # Use embedding to infer interests (simplified for speed)
                # In a full system, you'd use a classifier trained on the BERT embeddings
                embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()[0]

                # Map embedding to interests (simplified example)
                tech_keywords = ["technology", "innovation", "science", "future", "digital"]
                family_keywords = ["family", "children", "parents", "kids", "home"]
                outdoor_keywords = ["outdoor", "nature", "adventure", "travel", "explore"]
                luxury_keywords = ["luxury", "premium", "exclusive", "elegant", "high-end"]
                finance_keywords = ["budget", "finance", "saving", "investment", "practical"]

                keyword_groups = [tech_keywords, family_keywords, outdoor_keywords, luxury_keywords, finance_keywords]
                group_names = ["technology", "family", "outdoor", "luxury", "finance"]

                # Check each group against the comment
                for group, name in zip(keyword_groups, group_names):
                    for keyword in group:
                        if keyword in all_comments.lower():
                            interests[name] += 2  # Higher weight for direct mentions

        # Create user profile
        profile = {
            "user_info": user_data["user_info"],
            "top_genres": [genre for genre, _ in genre_preferences.most_common(3)],
            "top_interests": [interest for interest, _ in interests.most_common(5)],
            "ad_category_affinity": [category for category, _ in ad_category_affinity.most_common(3)]
        }

        # Add demographic segment
        if "age" in user_data["user_info"] and "income_bracket" in user_data["user_info"]:
            age = user_data["user_info"]["age"]
            income = user_data["user_info"]["income_bracket"]
            gender = user_data["user_info"].get("gender", "unknown")

            # Age group
            if age < 25:
                age_group = "young adult"
            elif age < 35:
                age_group = "early career"
            elif age < 50:
                age_group = "established"
            else:
                age_group = "mature"

            profile["demographic_segment"] = f"{age_group} {income}-income {gender}"

        # Free GPU memory
        torch.cuda.empty_cache()

        return profile


Using device: cuda


In [2]:

# --------------------------------
# Movie Context Analyzer
# --------------------------------
class MovieContextAnalyzer:
    def analyze_current_movie(self, content_data):
        """Analyze the currently watched movie to determine ad context"""
        if not content_data:
            return None

        context = {
            "title": content_data.get("title", "Unknown"),
            "genres": [g.strip() for g in content_data.get("genres", "").split(",")],
            "mood": [m.strip() for m in content_data.get("mood", "").split(",")],
            "themes": [t.strip() for t in content_data.get("themes", "").split(",")],
            "suitable_ad_categories": [a.strip() for a in content_data.get("ad_suitability", "").split(",")]
        }

        return context

# --------------------------------
# Ad Category Matcher (FFNN)
# --------------------------------
class AdCategoryMatcher(nn.Module):
    def __init__(self, categories):
        super(AdCategoryMatcher, self).__init__()
        print("Initializing FFNN Ad Category Matcher...")
        self.categories = categories

        # Simple 3-layer network
        self.network = nn.Sequential(
            nn.Linear(40, 64),  # Fixed input size for combined features
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, len(categories)),
            nn.Softmax(dim=1)
        ).to(device)

        # Train the model with synthetic data
        self._train_model()
        print("FFNN model trained")

    def forward(self, x):
        return self.network(x)

    def _train_model(self):
        """Train the model with synthetic data"""
        # Create synthetic training data
        X_train, y_train = self._create_synthetic_data()

        # Training parameters
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(self.parameters(), lr=0.001)
        epochs = 100
        batch_size = 16

        # Convert to tensors
        X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
        y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)

        # Training loop
        for epoch in range(epochs):
            # Random batch indices
            indices = torch.randperm(X_train_tensor.size(0))

            for i in range(0, len(indices), batch_size):
                # Get batch
                batch_indices = indices[i:i+batch_size]
                X_batch = X_train_tensor[batch_indices]
                y_batch = y_train_tensor[batch_indices]

                # Forward pass
                outputs = self(X_batch)
                loss = criterion(outputs, y_batch)

                # Backward and optimize
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

    def _create_synthetic_data(self):
        """Create synthetic training data"""
        # Features that match categories
        tech_features = ["technology", "innovation", "sci-fi", "futuristic", "technical", "digital"]
        family_features = ["family", "children", "animation", "educational", "wholesome", "playful"]
        outdoor_features = ["adventure", "travel", "nature", "outdoors", "exploration", "active"]
        luxury_features = ["luxury", "fashion", "elegant", "premium", "stylish", "exclusive"]
        finance_features = ["budget", "finance", "practical", "value", "savings", "investment"]

        # Create data with positive examples for each category
        X_data = []
        y_data = []

        # For each category, create examples
        for category_idx, category_features in enumerate([tech_features, family_features, outdoor_features, luxury_features, finance_features]):
            # Create multiple variations per category
            for _ in range(50):  # 50 examples per category
                # User features (first 20)
                user_features = [0] * 20
                # Select 3-5 relevant features for this category
                num_features = random.randint(3, 5)
                for _ in range(num_features):
                    feature_idx = random.randint(0, 19)
                    user_features[feature_idx] = 1

                # Content features (last 20)
                content_features = [0] * 20
                # Select 3-5 relevant features for this category
                num_features = random.randint(3, 5)
                for _ in range(num_features):
                    feature_idx = random.randint(0, 19)
                    content_features[feature_idx] = 1

                # Add special "signature" features for this category
                # These are features that strongly indicate this category
                for _ in range(2):
                    # Add signature to user profile
                    sig_idx = random.randint(0, 19)
                    if category_idx == 0:  # Tech
                        if random.random() < 0.8:  # 80% chance
                            user_features[0] = 1  # "technology" feature
                    elif category_idx == 1:  # Family
                        if random.random() < 0.8:
                            user_features[5] = 1  # "family" feature
                    elif category_idx == 2:  # Outdoor
                        if random.random() < 0.8:
                            user_features[10] = 1  # "adventure" feature
                    elif category_idx == 3:  # Luxury
                        if random.random() < 0.8:
                            user_features[15] = 1  # "luxury" feature
                    elif category_idx == 4:  # Finance
                        if random.random() < 0.8:
                            user_features[18] = 1  # "budget" feature

                # Combine features
                combined_features = user_features + content_features
                X_data.append(combined_features)
                y_data.append(category_idx)

        return X_data, y_data

    def match_ad_category(self, user_profile, movie_context):
        """Match user profile and movie context to best ad category"""
        # Extract user features
        user_features = self._extract_user_features(user_profile)

        # Extract movie features
        movie_features = self._extract_movie_features(movie_context)

        # Combine features
        combined_features = user_features + movie_features

        # Convert to tensor
        features_tensor = torch.tensor([combined_features], dtype=torch.float32).to(device)

        # Get prediction
        with torch.no_grad():
            outputs = self(features_tensor)
            probabilities = outputs[0].cpu().numpy()

        # Get category rankings
        category_ranks = [(self.categories[i]["name"], float(prob)) for i, prob in enumerate(probabilities)]
        category_ranks.sort(key=lambda x: x[1], reverse=True)

        # Create result with top category and all rankings
        result = {
            "top_category": category_ranks[0][0],
            "confidence": category_ranks[0][1],
            "category_rankings": category_ranks
        }

        return result

    def _extract_user_features(self, user_profile):
        """Extract 20 binary features from user profile"""
        # Initialize features
        features = [0] * 20

        # User demographic features (features 0-4)
        demographic = user_profile.get("demographic_segment", "").lower()
        if "young" in demographic:
            features[0] = 1
        if "high-income" in demographic:
            features[1] = 1
        if "male" in demographic:
            features[2] = 1
        if "female" in demographic:
            features[3] = 1
        if "early career" in demographic:
            features[4] = 1

        # Interest features (features 5-14)
        interests = user_profile.get("top_interests", [])
        interest_map = {
            "technology": 5,
            "innovation": 6,
            "family": 7,
            "education": 8,
            "adventure": 9,
            "outdoor": 10,
            "travel": 11,
            "luxury": 12,
            "fashion": 13,
            "finance": 14
        }

        for interest in interests:
            if interest in interest_map:
                features[interest_map[interest]] = 1

        # Genre preferences (features 15-19)
        genres = user_profile.get("top_genres", [])
        genre_map = {
            "sci-fi": 15,
            "animation": 16,
            "adventure": 17,
            "drama": 18,
            "comedy": 19
        }

        for genre in genres:
            if genre in genre_map:
                features[genre_map[genre]] = 1

        return features

    def _extract_movie_features(self, movie_context):
        """Extract 20 binary features from movie context"""
        # Initialize features
        features = [0] * 20

        # Genre features (features 0-7)
        genres = movie_context.get("genres", [])
        genre_map = {
            "sci-fi": 0,
            "action": 1,
            "drama": 2,
            "comedy": 3,
            "animation": 4,
            "adventure": 5,
            "romance": 6,
            "biography": 7
        }

        for genre in genres:
            if genre in genre_map:
                features[genre_map[genre]] = 1

        # Mood features (features 8-13)
        moods = movie_context.get("mood", [])
        mood_map = {
            "thought-provoking": 8,
            "inspiring": 9,
            "emotional": 10,
            "intense": 11,
            "uplifting": 12,
            "nostalgic": 13
        }

        for mood in moods:
            if mood in mood_map:
                features[mood_map[mood]] = 1

        # Theme features (features 14-19)
        themes = movie_context.get("themes", [])
        theme_map = {
            "technology": 14,
            "family": 15,
            "adventure": 16,
            "luxury": 17,
            "fashion": 18,
            "finance": 19
        }

        for theme in themes:
            if theme in theme_map:
                features[theme_map[theme]] = 1

        # Ad suitability direct mapping (bonus features)
        ad_categories = movie_context.get("suitable_ad_categories", [])
        for category in ad_categories:
            if category == "technology" and features[14] == 0:
                features[14] = 1
            elif category == "family" and features[15] == 0:
                features[15] = 1
            elif category in ["outdoor", "travel"] and features[16] == 0:
                features[16] = 1
            elif category == "luxury" and features[17] == 0:
                features[17] = 1
            elif category in ["finance", "budget"] and features[19] == 0:
                features[19] = 1

        return features


In [3]:

# --------------------------------
# Main Pipeline
# --------------------------------
class AdCategoryMatchingPipeline:
    def __init__(self):
        print("Initializing Ad Category Matching Pipeline...")

        # Initialize components
        self.user_profiler = BERTUserProfiler()
        self.movie_analyzer = MovieContextAnalyzer()
        self.ad_matcher = AdCategoryMatcher(get_ad_categories())

    def match_ad_category(self, user_id, content_id):
        """Complete process: profile user, analyze movie, match ad category"""
        results = {
            "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            "user_id": user_id,
            "content_id": content_id
        }

        # 1. Get user data
        user_data = get_user_data(user_id)
        if not user_data:
            return {"status": "error", "message": f"User with ID {user_id} not found"}

        # 2. Profile user
        user_profile = self.user_profiler.profile_user(user_data)
        results["user_profile"] = {
            "username": user_data["user_info"].get("username", f"User {user_id}"),
            "demographic": user_profile.get("demographic_segment", "Unknown"),
            "top_interests": user_profile["top_interests"],
            "top_genres": user_profile["top_genres"]
        }

        # 3. Analyze current movie
        content_data = get_content_data(content_id)
        if not content_data:
            return {"status": "error", "message": f"Content with ID {content_id} not found"}

        movie_context = self.movie_analyzer.analyze_current_movie(content_data)
        results["movie_context"] = {
            "title": movie_context["title"],
            "genres": movie_context["genres"],
            "mood": movie_context["mood"],
            "themes": movie_context["themes"]
        }

        # 4. Match ad category
        category_match = self.ad_matcher.match_ad_category(user_profile, movie_context)
        results["ad_category_match"] = {
            "top_category": category_match["top_category"],
            "confidence": round(category_match["confidence"], 2),
            "all_categories": [(cat, round(score, 2)) for cat, score in category_match["category_rankings"]]
        }

        # 5. Get recommended products for this category
        top_category = category_match["top_category"]
        category_data = next((c for c in AD_CATEGORIES_DB if c["name"] == top_category), None)
        if category_data:
            results["recommended_products"] = category_data["sample_products"].split(", ")

        return results


In [5]:

# --------------------------------
# Main Execution
# --------------------------------
def main():
    # Initialize the pipeline
    print("\nInitializing Ad Category Matcher...")
    pipeline = AdCategoryMatchingPipeline()

    # Randomly select a user
    users = get_all_users()
    user_idx = random.randint(0, len(users)-1)
    user_id = users[user_idx][0]
    print(f"\nRandomly selected user: {users[user_idx][1]} (ID: {user_id})")

    # Randomly select content
    content_list = get_all_content()
    content_idx = random.randint(0, len(content_list)-1)
    content_id = content_list[content_idx][0]
    print(f"Randomly selected movie: {content_list[content_idx][1]} (ID: {content_id})")

    # Process and match ad category
    print("\nProcessing data and matching ad category...")
    result = pipeline.match_ad_category(user_id, content_id)

    # Display results
    print("\n" + "="*50)
    print(f"AD CATEGORY MATCH RESULTS")
    print(f"User: {result['user_profile']['username']}")
    print(f"Movie: {result['movie_context']['title']}")
    print("="*50)

    print("\nUSER PROFILE:")
    print(f"Demographic: {result['user_profile']['demographic']}")
    print(f"Interests: {', '.join(result['user_profile']['top_interests'])}")
    print(f"Preferred Genres: {', '.join(result['user_profile']['top_genres'])}")

    print("\nMOVIE CONTEXT:")
    print(f"Genres: {', '.join(result['movie_context']['genres'])}")
    print(f"Mood: {', '.join(result['movie_context']['mood'])}")
    print(f"Themes: {', '.join(result['movie_context']['themes'])}")

    print("\nBEST AD CATEGORY MATCH:")
    print(f"► {result['ad_category_match']['top_category']} (Confidence: {result['ad_category_match']['confidence']:.2f})")

    print("\nALL CATEGORY RANKINGS:")
    for category, score in result['ad_category_match']['all_categories']:
        print(f"  {category}: {score:.2f}")

    print("\nRECOMMENDED PRODUCTS:")
    for product in result['recommended_products']:
        print(f"  • {product}")

    print("\n" + "="*50)

    # Save result to file
    try:
        with open(f"ad_match_{result['user_profile']['username']}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json", 'w') as f:
            json.dump(result, f, indent=2)
        print(f"\nResults saved to file")
    except Exception as e:
        print(f"Note: Could not save to file - {str(e)}")

if __name__ == "__main__":
    main()


Initializing Ad Category Matcher...
Initializing Ad Category Matching Pipeline...
Initializing BERT-based User Profiler...
BERT model loaded successfully
Initializing FFNN Ad Category Matcher...
FFNN model trained

Randomly selected user: TechEnthusiast (ID: 1)
Randomly selected movie: Up (ID: 129)

Processing data and matching ad category...

AD CATEGORY MATCH RESULTS
User: TechEnthusiast
Movie: Up

USER PROFILE:
Demographic: early career high-income Male
Interests: technology
Preferred Genres: sci-fi, action, thriller

MOVIE CONTEXT:
Genres: animation, adventure, family
Mood: bittersweet, touching
Themes: friendship, adventure

BEST AD CATEGORY MATCH:
► Family (Confidence: 0.99)

ALL CATEGORY RANKINGS:
  Family: 0.99
  Luxury: 0.01
  Finance: 0.00
  Technology: 0.00
  Outdoor: 0.00

RECOMMENDED PRODUCTS:
  • toys
  • educational apps
  • family activities
  • food
  • children's products


Results saved to file
