In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image
import pandas as pd
import numpy as np
import os
import os.path as osp
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
import seaborn as sns
from torchvision import models, transforms
import time
import json
import csv
from ast import literal_eval
import unicodedata

# Import artemis modules (same as your notebook)
from artemis.emotions import ARTEMIS_EMOTIONS, IDX_TO_EMOTION
from artemis.neural_models.resnet_encoder import ResnetEncoder
from artemis.neural_models.mlp import MLP
from artemis.neural_models.image_emotion_clf import ImageEmotionClassifier
from artemis.in_out.neural_net_oriented import torch_load_model

# Try to import FAISS for speed optimization
try:
    import faiss
    FAISS_AVAILABLE = True
    print("✓ FAISS available for ultra-fast similarity search")
except ImportError:
    FAISS_AVAILABLE = False
    print("⚠ FAISS not installed. Install with: pip install faiss-gpu (or faiss-cpu)")
    print("  This will provide 10-100x speedup for large datasets")

# Configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Updated Paths
RESNET_MODEL_PATH = r'data/artemis/predictions/best_model_good_data.pt'
BOOK_RESNET_PARQUET = r'goodreads_emotion_results/goodreads_emotion_predictions_english.parquet'
BOOK_BERT_PARQUET = r'goodreads_bert_emotion_results/goodreads_bert_emotion_predictions_20250604_160958.parquet'
PREPROCESSED_BOOK_PARQUET = r'preprocessed_books_2025_04_20.parquet'


✓ FAISS available for ultra-fast similarity search
Using device: cuda


In [3]:

class CompleteImageBookMatcher:
    """
    Complete system for matching image emotions with book emotion distributions
    Merges image_url_large from original preprocessed books data
    """
    
    def __init__(self, use_gpu_similarity=True, precompute_indices=True):
        self.emotion_labels = ARTEMIS_EMOTIONS
        self.num_emotions = len(self.emotion_labels)
        self.device = device
        self.use_gpu_similarity = use_gpu_similarity and torch.cuda.is_available()
        self.use_faiss = FAISS_AVAILABLE
        
        # Emotion column mapping based on your parquet structure
        self.emotion_columns = [
            'prob_amusement', 'prob_anger', 'prob_awe', 'prob_contentment',
            'prob_disgust', 'prob_excitement', 'prob_fear', 'prob_sadness',
            'prob_something_else'
        ]
        
        print("="*80)
        print("INITIALIZING COMPLETE IMAGE-BOOK EMOTION MATCHING SYSTEM")
        print("="*80)
        
        # Load ResNet model (same architecture as your notebook)
        print("Loading ResNet model...")
        self.resnet_model = self._load_resnet_model()
        
        # Load and preprocess book data with image URLs
        print("Loading and preprocessing book emotion distributions with image URLs...")
        self.book_resnet_df, self.book_bert_df = self._load_book_data_with_images()
        
        # Precompute emotion matrices for fast similarity
        print("Precomputing emotion matrices for fast similarity...")
        self._precompute_emotion_matrices()
        
        # Setup FAISS indices for ultra-fast similarity search
        if precompute_indices and self.use_faiss:
            print("Building FAISS indices for similarity search...")
            self._build_faiss_indices()
        
        # Image preprocessing (same as your notebook)
        self.image_transform = transforms.Compose([
            transforms.Resize((256, 256)),  # Same as your notebook
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                               std=[0.229, 0.224, 0.225])
        ])
        
        print(f"✓ System ready! Loaded {len(self.book_resnet_df)} books for matching")
        print("="*80)
    
    def _load_resnet_model(self):
        """Load ResNet model using exact same approach as your notebook"""
        try:
            # Load model exactly as in your notebook
            model = torch_load_model(RESNET_MODEL_PATH)
            model.to(self.device)
            model.eval()
            
            # Test the model with a dummy input to make sure it works
            with torch.no_grad():
                dummy_input = torch.randn(1, 3, 256, 256).to(self.device)
                _ = model(dummy_input)
            
            print("✓ ResNet model loaded and tested successfully")
            return model
            
        except Exception as e:
            print(f"Error loading ResNet model: {e}")
            print("Creating dummy ResNet for testing...")
            return self._create_dummy_resnet()
    
    def _create_dummy_resnet(self):
        """Create dummy ResNet for testing if model loading fails"""
        print("⚠ Using dummy ResNet model for testing")
        model = models.resnet34(weights='IMAGENET1K_V1')
        model.fc = nn.Linear(model.fc.in_features, self.num_emotions)
        model.to(self.device)
        model.eval()
        return model
    
    def _load_book_data_with_images(self):
        """Load book data and merge with image URLs from preprocessed books"""
        try:
            print("Loading preprocessed books data for image URLs...")
            # Load only book_id and image_url_large from preprocessed books
            preprocessed_df = pd.read_parquet(PREPROCESSED_BOOK_PARQUET, 
                                            columns=['book_id', 'image_url_large', 'description'])
            print(f"✓ Loaded {len(preprocessed_df)} books from preprocessed data")
            print(f"✓ Found {len(preprocessed_df[preprocessed_df['image_url_large'].notna()])} books with image URLs")
            
            # Load emotion prediction data
            print("Loading ResNet emotion predictions...")
            book_resnet_df = pd.read_parquet(BOOK_RESNET_PARQUET)
            
            print("Loading BERT emotion predictions...")
            book_bert_df = pd.read_parquet(BOOK_BERT_PARQUET)
            
            print(f"✓ Loaded ResNet emotions for {len(book_resnet_df)} books")
            print(f"✓ Loaded BERT emotions for {len(book_bert_df)} books")
            
            # Merge with image URLs
            print("Merging ResNet data with image URLs...")
            book_resnet_df = book_resnet_df.merge(
                preprocessed_df[['book_id', 'image_url_large', 'description']], 
                on='book_id', 
                how='left'
            )
            
            print("Merging BERT data with image URLs...")
            book_bert_df = book_bert_df.merge(
                preprocessed_df[['book_id', 'image_url_large', 'description']], 
                on='book_id', 
                how='left'
            )
            
            # Keep only books that exist in both datasets for multimodal comparison
            common_book_ids = set(book_resnet_df['book_id']).intersection(
                set(book_bert_df['book_id'])
            )
            
            book_resnet_df = book_resnet_df[book_resnet_df['book_id'].isin(common_book_ids)]
            book_bert_df = book_bert_df[book_bert_df['book_id'].isin(common_book_ids)]
            
            # Sort by book_id for faster merging later
            book_resnet_df = book_resnet_df.sort_values('book_id').reset_index(drop=True)
            book_bert_df = book_bert_df.sort_values('book_id').reset_index(drop=True)
            
            # Count books with image URLs
            resnet_with_images = len(book_resnet_df[book_resnet_df['image_url_large'].notna()])
            bert_with_images = len(book_bert_df[book_bert_df['image_url_large'].notna()])
            
            print(f"✓ Final dataset: {len(book_resnet_df)} books with both ResNet and BERT predictions")
            print(f"✓ ResNet books with image URLs: {resnet_with_images}")
            print(f"✓ BERT books with image URLs: {bert_with_images}")
            
            return book_resnet_df, book_bert_df
            
        except Exception as e:
            print(f"Error loading book data: {e}")
            import traceback
            traceback.print_exc()
            return self._create_dummy_book_data()
    
    def _create_dummy_book_data(self):
        """Create dummy book data for testing"""
        print("⚠ Creating dummy book data for testing")
        np.random.seed(42)
        
        n_books = 1000  # Smaller for testing
        book_ids = [f"book_{i}" for i in range(n_books)]
        titles = [f"Test Book Title {i}" for i in range(n_books)]
        authors = [[{'author_id': str(i), 'role': f'Test Author {i}'}] for i in range(n_books)]
        
        # Create dummy emotion probabilities
        emotion_probs = np.random.dirichlet(np.ones(self.num_emotions), n_books)
        
        # Create ResNet dataframe
        resnet_data = {
            'book_id': book_ids,
            'title': titles,
            'authors': authors,
            'average_rating': np.random.uniform(3.0, 5.0, n_books),
            'predicted_emotion': [self.emotion_labels[np.argmax(probs)] for probs in emotion_probs],
            'confidence': [np.max(probs) for probs in emotion_probs],
            'image_url_large': [f'https://example.com/cover_{i}.jpg' for i in range(n_books)]
        }
        
        # Add emotion probability columns
        for i, col in enumerate(self.emotion_columns):
            resnet_data[col] = emotion_probs[:, i]
        
        resnet_df = pd.DataFrame(resnet_data)
        
        # Create BERT dataframe (different emotion distributions)
        bert_emotion_probs = np.random.dirichlet(np.ones(self.num_emotions), n_books)
        bert_data = resnet_data.copy()
        bert_data['predicted_emotion'] = [self.emotion_labels[np.argmax(probs)] for probs in bert_emotion_probs]
        bert_data['confidence'] = [np.max(probs) for probs in bert_emotion_probs]
        
        for i, col in enumerate(self.emotion_columns):
            bert_data[col] = bert_emotion_probs[:, i]
        
        bert_df = pd.DataFrame(bert_data)
        
        return resnet_df, bert_df
    
    def _precompute_emotion_matrices(self):
        """Precompute emotion matrices for vectorized operations"""
        # Extract emotion matrices as numpy arrays for fast computation
        self.resnet_emotions_matrix = self.book_resnet_df[self.emotion_columns].values.astype(np.float32)
        self.bert_emotions_matrix = self.book_bert_df[self.emotion_columns].values.astype(np.float32)
        
        # Extract confidence arrays
        self.resnet_confidences = self.book_resnet_df['confidence'].values.astype(np.float32)
        self.bert_confidences = self.book_bert_df['confidence'].values.astype(np.float32)
        
        # Precompute combined emotions for all books
        print("Precomputing multimodal combinations...")
        self._precompute_multimodal_emotions()
        
        # Convert to GPU tensors if using GPU acceleration
        if self.use_gpu_similarity:
            print("Moving emotion matrices to GPU for fast similarity...")
            self.resnet_emotions_gpu = torch.from_numpy(self.resnet_emotions_matrix).to(self.device)
            self.bert_emotions_gpu = torch.from_numpy(self.bert_emotions_matrix).to(self.device)
            self.multimodal_emotions_gpu = torch.from_numpy(self.multimodal_emotions_matrix).to(self.device)
    
    def _precompute_multimodal_emotions(self):
        """Precompute confidence-weighted combinations for all books"""
        # Vectorized confidence weighting
        total_confidences = self.resnet_confidences + self.bert_confidences
        resnet_weights = self.resnet_confidences / total_confidences
        bert_weights = self.bert_confidences / total_confidences
        
        # Vectorized combination
        self.multimodal_emotions_matrix = (
            resnet_weights[:, np.newaxis] * self.resnet_emotions_matrix +
            bert_weights[:, np.newaxis] * self.bert_emotions_matrix
        )
        
        # Normalize to ensure proper probability distributions
        row_sums = self.multimodal_emotions_matrix.sum(axis=1, keepdims=True)
        self.multimodal_emotions_matrix = self.multimodal_emotions_matrix / row_sums
        
        # Store weights for later use
        self.resnet_weights = resnet_weights
        self.bert_weights = bert_weights
    
    def _build_faiss_indices(self):
        """Build FAISS indices for ultra-fast similarity search"""
        if not FAISS_AVAILABLE:
            self.use_faiss = False
            return
            
        try:
            # Build indices for each emotion type
            dimension = self.num_emotions
            
            # ResNet index
            self.resnet_index = faiss.IndexFlatIP(dimension)  # Inner product (cosine after normalization)
            resnet_normalized = self.resnet_emotions_matrix / np.linalg.norm(
                self.resnet_emotions_matrix, axis=1, keepdims=True
            )
            self.resnet_index.add(resnet_normalized.astype('float32'))
            
            # BERT index
            self.bert_index = faiss.IndexFlatIP(dimension)
            bert_normalized = self.bert_emotions_matrix / np.linalg.norm(
                self.bert_emotions_matrix, axis=1, keepdims=True
            )
            self.bert_index.add(bert_normalized.astype('float32'))
            
            # Multimodal index
            self.multimodal_index = faiss.IndexFlatIP(dimension)
            multimodal_normalized = self.multimodal_emotions_matrix / np.linalg.norm(
                self.multimodal_emotions_matrix, axis=1, keepdims=True
            )
            self.multimodal_index.add(multimodal_normalized.astype('float32'))
            
            # Move to GPU if available
            if faiss.get_num_gpus() > 0:
                self.resnet_index = faiss.index_cpu_to_gpu(faiss.StandardGpuResources(), 0, self.resnet_index)
                self.bert_index = faiss.index_cpu_to_gpu(faiss.StandardGpuResources(), 0, self.bert_index)
                self.multimodal_index = faiss.index_cpu_to_gpu(faiss.StandardGpuResources(), 0, self.multimodal_index)
                print("✓ FAISS indices moved to GPU")
            
            self.use_faiss = True
            print("✓ FAISS indices built successfully")
            
        except Exception as e:
            print(f"Error building FAISS indices: {e}")
            self.use_faiss = False
    
    def predict_image_emotions(self, image_path):
        """Predict emotions for an input image using ResNet (same as your notebook)"""
        try:
            # Load and preprocess image (same as notebook preprocessing)
            image = Image.open(image_path).convert('RGB')
            image_tensor = self.image_transform(image).unsqueeze(0).to(self.device)
            
            # Fast inference with no gradient computation
            with torch.no_grad():
                # Model outputs log probabilities (LogSoftmax), so we need to exp them
                log_probs = self.resnet_model(image_tensor)
                probs = torch.exp(log_probs).cpu().numpy()[0]  # Convert to probabilities
                confidence = float(torch.max(torch.exp(log_probs)).cpu())
            
            return {
                'emotion_distribution': probs,
                'confidence': confidence,
                'dominant_emotion': self.emotion_labels[np.argmax(probs)],
                'emotion_scores': dict(zip(self.emotion_labels, probs))
            }
            
        except Exception as e:
            print(f"Error predicting image emotions: {e}")
            import traceback
            traceback.print_exc()
            
            uniform_probs = np.ones(self.num_emotions) / self.num_emotions
            return {
                'emotion_distribution': uniform_probs,
                'confidence': 1.0 / self.num_emotions,
                'dominant_emotion': self.emotion_labels[0],
                'emotion_scores': dict(zip(self.emotion_labels, uniform_probs))
            }
    
    def fast_similarity_search(self, image_emotions, top_k=1000):
        """Ultra-fast similarity search using FAISS or GPU acceleration"""
        if self.use_faiss:
            return self._faiss_similarity_search(image_emotions, top_k)
        elif self.use_gpu_similarity:
            return self._gpu_similarity_search(image_emotions, top_k)
        else:
            return self._cpu_similarity_search(image_emotions, top_k)
    
    def _faiss_similarity_search(self, image_emotions, top_k):
        """FAISS-based similarity search (fastest)"""
        # Normalize query
        query = image_emotions / np.linalg.norm(image_emotions)
        query = query.reshape(1, -1).astype('float32')
        
        # Search all three indices
        resnet_scores, resnet_indices = self.resnet_index.search(query, top_k)
        bert_scores, bert_indices = self.bert_index.search(query, top_k)
        multimodal_scores, multimodal_indices = self.multimodal_index.search(query, top_k)
        
        return {
            'resnet': {'scores': resnet_scores[0], 'indices': resnet_indices[0]},
            'bert': {'scores': bert_scores[0], 'indices': bert_indices[0]},
            'multimodal': {'scores': multimodal_scores[0], 'indices': multimodal_indices[0]}
        }
    
    def _gpu_similarity_search(self, image_emotions, top_k):
        """GPU-accelerated similarity search"""
        # Convert to GPU tensor
        image_tensor = torch.from_numpy(image_emotions).to(self.device).float()
        image_tensor = image_tensor / torch.norm(image_tensor)  # Normalize
        
        # Compute similarities using matrix multiplication
        resnet_similarities = torch.mm(image_tensor.unsqueeze(0), self.resnet_emotions_gpu.t()).squeeze()
        bert_similarities = torch.mm(image_tensor.unsqueeze(0), self.bert_emotions_gpu.t()).squeeze()
        multimodal_similarities = torch.mm(image_tensor.unsqueeze(0), self.multimodal_emotions_gpu.t()).squeeze()
        
        # Get top-k
        resnet_top_k = torch.topk(resnet_similarities, min(top_k, len(resnet_similarities)))
        bert_top_k = torch.topk(bert_similarities, min(top_k, len(bert_similarities)))
        multimodal_top_k = torch.topk(multimodal_similarities, min(top_k, len(multimodal_similarities)))
        
        return {
            'resnet': {
                'scores': resnet_top_k.values.cpu().numpy(),
                'indices': resnet_top_k.indices.cpu().numpy()
            },
            'bert': {
                'scores': bert_top_k.values.cpu().numpy(), 
                'indices': bert_top_k.indices.cpu().numpy()
            },
            'multimodal': {
                'scores': multimodal_top_k.values.cpu().numpy(),
                'indices': multimodal_top_k.indices.cpu().numpy()
            }
        }
    
    def _cpu_similarity_search(self, image_emotions, top_k):
        """CPU-based similarity search (fallback)"""
        # Compute cosine similarities
        resnet_similarities = cosine_similarity([image_emotions], self.resnet_emotions_matrix)[0]
        bert_similarities = cosine_similarity([image_emotions], self.bert_emotions_matrix)[0]
        multimodal_similarities = cosine_similarity([image_emotions], self.multimodal_emotions_matrix)[0]
        
        # Get top-k indices
        resnet_top_indices = np.argpartition(resnet_similarities, -top_k)[-top_k:]
        bert_top_indices = np.argpartition(bert_similarities, -top_k)[-top_k:]
        multimodal_top_indices = np.argpartition(multimodal_similarities, -top_k)[-top_k:]
        
        # Sort by similarity
        resnet_top_indices = resnet_top_indices[np.argsort(resnet_similarities[resnet_top_indices])[::-1]]
        bert_top_indices = bert_top_indices[np.argsort(bert_similarities[bert_top_indices])[::-1]]
        multimodal_top_indices = multimodal_top_indices[np.argsort(multimodal_similarities[multimodal_top_indices])[::-1]]
        
        return {
            'resnet': {
                'scores': resnet_similarities[resnet_top_indices],
                'indices': resnet_top_indices
            },
            'bert': {
                'scores': bert_similarities[bert_top_indices],
                'indices': bert_top_indices
            },
            'multimodal': {
                'scores': multimodal_similarities[multimodal_top_indices],
                'indices': multimodal_top_indices
            }
        }
    
    def find_similar_books_complete(self, image_path, top_k=1000, final_n=3):
        """Complete book similarity search with all optimizations"""
        start_time = time.time()
        
        # Predict image emotions
        image_prediction = self.predict_image_emotions(image_path)
        image_emotions = image_prediction['emotion_distribution']
        
        print(f"Image prediction took {time.time() - start_time:.3f}s")
        print(f"Image dominant emotion: {image_prediction['dominant_emotion']}")
        print(f"Image confidence: {image_prediction['confidence']:.3f}")
        
        # Fast similarity search - get ALL books, not just top_k
        search_start = time.time()
        total_books = len(self.book_resnet_df)
        search_results = self.fast_similarity_search(image_emotions, total_books)  # Get all books
        print(f"Similarity search took {time.time() - search_start:.3f}s")
        
        # Extract top, middle, bottom recommendations for each approach
        def extract_recommendations(scores, indices, approach_name):
            total_results = len(scores)
            
            # Top N most similar (highest scores)
            top_indices = indices[:final_n]
            top_scores = scores[:final_n]
            
            # Middle N (around the median)
            middle_start = max(0, (total_results // 2) - (final_n // 2))
            middle_end = min(total_results, middle_start + final_n)
            middle_indices = indices[middle_start:middle_end]
            middle_scores = scores[middle_start:middle_end]
            
            # Bottom N least similar (lowest scores)
            bottom_indices = indices[-final_n:]
            bottom_scores = scores[-final_n:]
            
            # Create recommendation objects
            def create_recommendations(rec_indices, rec_scores, category):
                recommendations = []
                for i, (idx, score) in enumerate(zip(rec_indices, rec_scores)):
                    book_row = self.book_resnet_df.iloc[idx]  # Use ResNet df for metadata
                    bert_row = self.book_bert_df.iloc[idx]    # Get BERT data for emotions
                    
                    recommendations.append({
                        'rank': i + 1,
                        'book_id': book_row['book_id'],
                        'title': book_row['title'],
                        'authors': book_row['authors'],
                        'average_rating': book_row.get('average_rating', 'N/A'),
                        'similarity_score': float(score),
                        'approach': approach_name,
                        'category': category,
                        'global_rank': int(np.where(indices == idx)[0][0] + 1),  # Rank among all books
                        'resnet_predicted_emotion': book_row['predicted_emotion'],
                        'bert_predicted_emotion': bert_row['predicted_emotion'],
                        'image_url_large': book_row.get('image_url_large', 'No cover URL'),
                        'description': book_row.get('description', None)
                    })
                return recommendations
            
            top_recommendations = create_recommendations(top_indices, top_scores, 'top')
            middle_recommendations = create_recommendations(middle_indices, middle_scores, 'middle')
            bottom_recommendations = create_recommendations(bottom_indices, bottom_scores, 'bottom')
            
            return top_recommendations, middle_recommendations, bottom_recommendations
        
        # Extract recommendations for each approach
        resnet_top, resnet_middle, resnet_bottom = extract_recommendations(
            search_results['resnet']['scores'], search_results['resnet']['indices'], 'ResNet'
        )
        
        bert_top, bert_middle, bert_bottom = extract_recommendations(
            search_results['bert']['scores'], search_results['bert']['indices'], 'BERT'
        )
        
        multimodal_top, multimodal_middle, multimodal_bottom = extract_recommendations(
            search_results['multimodal']['scores'], search_results['multimodal']['indices'], 'Multimodal'
        )
        
        total_time = time.time() - start_time
        print(f"Total processing time: {total_time:.3f}s for {len(self.book_resnet_df)} books")
        
        # Print summary of what we found
        print(f"\n📊 SIMILARITY SUMMARY:")
        print(f"   ResNet: Best={resnet_top[0]['similarity_score']:.4f}, "
            f"Middle={resnet_middle[0]['similarity_score']:.4f}, "
            f"Worst={resnet_bottom[0]['similarity_score']:.4f}")
        print(f"   BERT: Best={bert_top[0]['similarity_score']:.4f}, "
            f"Middle={bert_middle[0]['similarity_score']:.4f}, "
            f"Worst={bert_bottom[0]['similarity_score']:.4f}")
        print(f"   Multimodal: Best={multimodal_top[0]['similarity_score']:.4f}, "
            f"Middle={multimodal_middle[0]['similarity_score']:.4f}, "
            f"Worst={multimodal_bottom[0]['similarity_score']:.4f}")
        
        return {
            'image_prediction': image_prediction,
            'processing_time': total_time,
            'total_books_analyzed': total_books,
            'resnet_recommendations': {
                'top': resnet_top,
                'middle': resnet_middle,
                'bottom': resnet_bottom
            },
            'bert_recommendations': {
                'top': bert_top,
                'middle': bert_middle,
                'bottom': bert_bottom
            },
            'multimodal_recommendations': {
                'top': multimodal_top,
                'middle': multimodal_middle,
                'bottom': multimodal_bottom
            }
        }
    
    def display_emotion_distributions(self, results, show_plots=True):
        """Display and compare emotion distributions between image and recommendations"""
        
        print("\n" + "🎭" * 80)
        print("EMOTION DISTRIBUTION ANALYSIS")
        print("🎭" * 80)
        
        # Get image emotion distribution
        image_emotions = results['image_prediction']['emotion_distribution']
        image_scores = results['image_prediction']['emotion_scores']
        
        print(f"\n📸 INPUT IMAGE EMOTION DISTRIBUTION:")
        print(f"   Dominant: {results['image_prediction']['dominant_emotion']} (confidence: {results['image_prediction']['confidence']:.3f})")
        print("   Full Distribution:")
        
        # Display image emotions in a nice format
        for emotion, score in image_scores.items():
            bar_length = int(score * 50)  # Scale to 50 characters
            bar = "█" * bar_length + "░" * (50 - bar_length)
            print(f"   {emotion:15s} │{bar}│ {score:.3f}")
        
        print("\n" + "─" * 80)
        
        # Compare with top recommendations from each approach
        approaches = [
            ('🧠 RESNET RECOMMENDATIONS', results['resnet_recommendations']),
            ('📝 BERT RECOMMENDATIONS', results['bert_recommendations']), 
            ('🔗 MULTIMODAL RECOMMENDATIONS', results['multimodal_recommendations'])
        ]
        
        for approach_name, recommendations in approaches:
            print(f"\n{approach_name}")
            print("─" * 80)
            
            # Show top 3 books from this approach
            for i, book in enumerate(recommendations['top'], 1):
                print(f"\n📚 {i}. {book['title'][:50]}{'...' if len(book['title']) > 50 else ''}")
                print(f"   📊 Similarity Score: {book['similarity_score']:.4f}")
                print(f"   🎯 Predicted Emotion: {book.get('resnet_predicted_emotion', 'N/A')}")
                
                # Get book's emotion distribution
                book_idx = self._get_book_index(book['book_id'])
                if book_idx is not None:
                    if 'resnet' in approach_name.lower():
                        book_emotions = self.resnet_emotions_matrix[book_idx]
                    elif 'bert' in approach_name.lower():
                        book_emotions = self.bert_emotions_matrix[book_idx]
                    else:  # multimodal
                        book_emotions = self.multimodal_emotions_matrix[book_idx]
                    
                    print(f"   📈 Book Emotion Distribution:")
                    
                    # Calculate similarity metrics
                    cosine_sim = np.dot(image_emotions, book_emotions) / (
                        np.linalg.norm(image_emotions) * np.linalg.norm(book_emotions))
                    kl_div = self._calculate_kl_divergence(image_emotions, book_emotions)
                    
                    print(f"   🔍 Cosine Similarity: {cosine_sim:.4f}")
                    print(f"   📏 KL Divergence: {kl_div:.4f}")
                    
                    # Show emotion-by-emotion comparison
                    for j, emotion in enumerate(self.emotion_labels):
                        img_score = image_emotions[j]
                        book_score = book_emotions[j]
                        diff = book_score - img_score
                        
                        # Visual bars
                        img_bar_length = int(img_score * 30)
                        book_bar_length = int(book_score * 30)
                        img_bar = "█" * img_bar_length + "░" * (30 - img_bar_length)
                        book_bar = "█" * book_bar_length + "░" * (30 - book_bar_length)
                        
                        # Color code the difference
                        if abs(diff) < 0.05:
                            diff_icon = "≈"
                        elif diff > 0:
                            diff_icon = "↗"
                        else:
                            diff_icon = "↘"
                        
                        print(f"   {emotion:13s} │Img:{img_bar}│{img_score:.3f} │Book:{book_bar}│{book_score:.3f} {diff_icon}")
                    
                    print("   " + "─" * 70)
        
        # Create visualization plots if requested
        if show_plots:
            self._create_emotion_distribution_plots(results)
    
    def _get_book_index(self, book_id):
        """Get the index of a book in the dataframe by book_id"""
        try:
            # Find index in resnet dataframe (they should be aligned)
            mask = self.book_resnet_df['book_id'] == book_id
            indices = np.where(mask)[0]
            if len(indices) > 0:
                return indices[0]
            return None
        except:
            return None
    
    def _calculate_kl_divergence(self, p, q, epsilon=1e-8):
        """Calculate KL divergence between two probability distributions"""
        # Add small epsilon to avoid log(0)
        p = np.clip(p, epsilon, 1.0)
        q = np.clip(q, epsilon, 1.0)
        
        # Normalize to ensure they sum to 1
        p = p / np.sum(p)
        q = q / np.sum(q)
        
        return np.sum(p * np.log(p / q))
    
    def _create_emotion_distribution_plots(self, results):
        """Create visual plots comparing emotion distributions"""
        try:
            import matplotlib.pyplot as plt
            import seaborn as sns
            
            # Set up the plotting style
            plt.style.use('default')
            sns.set_palette("husl")
            
            # Create subplots: 2 rows x 2 cols
            fig, axes = plt.subplots(2, 2, figsize=(16, 12))
            fig.suptitle('Emotion Distribution Comparison', fontsize=16, fontweight='bold')
            
            # Get image emotions
            image_emotions = results['image_prediction']['emotion_distribution']
            
            # Plot 1: Image emotion distribution
            ax1 = axes[0, 0]
            bars1 = ax1.bar(self.emotion_labels, image_emotions, alpha=0.7, color='skyblue')
            ax1.set_title(f"Input Image\n(Dominant: {results['image_prediction']['dominant_emotion']})", 
                         fontweight='bold')
            ax1.set_ylabel('Probability')
            ax1.tick_params(axis='x', rotation=45)
            
            # Add value labels on bars
            for bar, val in zip(bars1, image_emotions):
                ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, 
                        f'{val:.3f}', ha='center', va='bottom', fontsize=9)
            
            # Plot 2-4: Top recommendation from each approach
            approaches = [
                ('ResNet Top Match', results['resnet_recommendations']['top'][0], self.resnet_emotions_matrix),
                ('BERT Top Match', results['bert_recommendations']['top'][0], self.bert_emotions_matrix),
                ('Multimodal Top Match', results['multimodal_recommendations']['top'][0], self.multimodal_emotions_matrix)
            ]
            
            ax_positions = [(0, 1), (1, 0), (1, 1)]
            colors = ['lightcoral', 'lightgreen', 'lightsalmon']
            
            for i, ((title, book, emotion_matrix), ax_pos, color) in enumerate(zip(approaches, ax_positions, colors)):
                ax = axes[ax_pos[0], ax_pos[1]]
                
                # Get book emotions
                book_idx = self._get_book_index(book['book_id'])
                if book_idx is not None:
                    book_emotions = emotion_matrix[book_idx]
                    
                    # Create comparison bars
                    x = np.arange(len(self.emotion_labels))
                    width = 0.35
                    
                    bars1 = ax.bar(x - width/2, image_emotions, width, label='Input Image', 
                                  alpha=0.7, color='skyblue')
                    bars2 = ax.bar(x + width/2, book_emotions, width, label='Book', 
                                  alpha=0.7, color=color)
                    
                    # Formatting
                    book_title = book['title'][:25] + '...' if len(book['title']) > 25 else book['title']
                    ax.set_title(f"{title}\n{book_title}\nSim: {book['similarity_score']:.3f}", 
                               fontweight='bold', fontsize=10)
                    ax.set_ylabel('Probability')
                    ax.set_xticks(x)
                    ax.set_xticklabels(self.emotion_labels, rotation=45, ha='right')
                    ax.legend()
                    
                    # Add difference annotations
                    for j, (img_val, book_val) in enumerate(zip(image_emotions, book_emotions)):
                        diff = book_val - img_val
                        if abs(diff) > 0.1:  # Only show significant differences
                            ax.annotate(f'{diff:+.2f}', 
                                      xy=(j, max(img_val, book_val) + 0.02),
                                      ha='center', va='bottom', fontsize=8,
                                      color='red' if diff > 0 else 'blue')
            
            plt.tight_layout()
            plt.show()
            
            # Create a heatmap comparison
            self._create_emotion_heatmap(results)
            
        except Exception as e:
            print(f"Could not create emotion distribution plots: {e}")
            print("Install required packages: pip install matplotlib seaborn")
    
    def _create_emotion_heatmap(self, results):
        """Create a heatmap showing emotion similarities across all recommendations"""
        try:
            import matplotlib.pyplot as plt
            import seaborn as sns
            
            # Collect all emotion distributions
            image_emotions = results['image_prediction']['emotion_distribution']
            
            # Get top recommendations from each approach
            all_books = []
            all_emotions = []
            labels = ['Input Image']
            all_emotions.append(image_emotions)
            
            approaches = [
                ('ResNet', results['resnet_recommendations']['top'][:3], self.resnet_emotions_matrix),
                ('BERT', results['bert_recommendations']['top'][:3], self.bert_emotions_matrix),
                ('Multimodal', results['multimodal_recommendations']['top'][:3], self.multimodal_emotions_matrix)
            ]
            
            for approach_name, books, emotion_matrix in approaches:
                for i, book in enumerate(books, 1):
                    book_idx = self._get_book_index(book['book_id'])
                    if book_idx is not None:
                        book_emotions = emotion_matrix[book_idx]
                        all_emotions.append(book_emotions)
                        
                        book_title = book['title'][:20] + '...' if len(book['title']) > 20 else book['title']
                        labels.append(f"{approach_name}-{i}\n{book_title}")
            
            # Create emotion matrix
            emotion_matrix = np.array(all_emotions)
            
            # Create heatmap
            plt.figure(figsize=(12, 10))
            sns.heatmap(emotion_matrix, 
                       xticklabels=self.emotion_labels,
                       yticklabels=labels,
                       annot=True, 
                       fmt='.3f',
                       cmap='YlOrRd',
                       cbar_kws={'label': 'Emotion Probability'})
            
            plt.title('Emotion Distribution Heatmap\nImage vs Top Book Recommendations', 
                     fontsize=14, fontweight='bold', pad=20)
            plt.xlabel('Emotions', fontweight='bold')
            plt.ylabel('Items', fontweight='bold')
            plt.xticks(rotation=45, ha='right')
            plt.yticks(rotation=0)
            plt.tight_layout()
            plt.show()
            
        except Exception as e:
            print(f"Could not create emotion heatmap: {e}")

    def display_detailed_recommendations(self, results, show_covers=True, show_emotion_analysis=True, save_charts=True, save_dir="emotion_analysis"):
        """Display detailed book recommendations with covers, descriptions, and emotion analysis"""
        print("\n" + "="*100)
        print("DETAILED BOOK RECOMMENDATIONS WITH COVERS")
        print("="*100)
        
        # Image prediction summary
        image_pred = results['image_prediction']
        print(f"\n📸 IMAGE ANALYSIS:")
        print(f"   Dominant Emotion: {image_pred['dominant_emotion']}")
        print(f"   Confidence: {image_pred['confidence']:.3f}")
        print(f"   Processing Time: {results['processing_time']:.3f}s")
        
        # Create and save emotion charts
        if save_charts:
            save_dir = "emotion_analysis_2"
            chart_dir = self.create_emotion_bar_charts(results, show_plots=show_emotion_analysis, save_dir=save_dir)
            detailed_analysis = self.create_detailed_emotion_report(results)
        
        # Show emotion distributions if requested
        if show_emotion_analysis:
            self.display_emotion_distributions(results, show_plots=False)  # Charts already shown above
        
        # All three approaches (rest of the existing code...)
        approaches = [
            ('🧠 RESNET-ONLY RECOMMENDATIONS', results['resnet_recommendations'], 'lightcoral'),
            ('📝 BERT-ONLY RECOMMENDATIONS', results['bert_recommendations'], 'lightgreen'), 
            ('🔗 MULTIMODAL RECOMMENDATIONS', results['multimodal_recommendations'], 'lightblue')
        ]
        
        for approach_name, recommendations, color in approaches:
            print(f"\n{'='*30} {approach_name} {'='*30}")
            
            categories = [
                ('🏆 TOP 3 MOST SIMILAR', recommendations['top']),
                ('📊 MIDDLE 3 SIMILAR', recommendations['middle']),
                ('📉 BOTTOM 3 LEAST SIMILAR', recommendations['bottom'])
            ]
            
            for category_name, books in categories:
                print(f"\n{category_name}")
                print("-" * 80)
                
                for i, book in enumerate(books, 1):
                    self._display_single_book(book, i, approach_name.split()[0])
        
        # Create a visual summary
        if show_covers:
            self._create_book_cover_visualization(results)
    
    def _display_single_book(self, book, rank, approach):
        """Display detailed information for a single book"""
        
        # Handle authors field (list of dicts from your parquet structure)
        authors = book['authors']
        if isinstance(authors, list) and len(authors) > 0:
            try:
                author_names = []
                for author in authors[:3]:  # Show max 3 authors
                    if isinstance(author, dict):
                        author_name = author.get('role', 'Unknown Author')
                        if not author_name or author_name == '':
                            author_name = f"Author ID: {author.get('author_id', 'Unknown')}"
                    else:
                        author_name = str(author)
                    author_names.append(author_name)
                author_str = ', '.join(author_names)
            except:
                author_str = 'Unknown Author'
        else:
            author_str = 'Unknown Author'
        
        # Extract book information
        title = book['title']
        book_id = book['book_id']
        similarity = book['similarity_score']
        rating = book.get('average_rating', 'N/A')
        
        # Get emotion predictions
        resnet_emotion = book.get('resnet_predicted_emotion', 'N/A')
        bert_emotion = book.get('bert_predicted_emotion', 'N/A')
        
        # Use the actual image_url_large from your merged data
        cover_url = book.get('image_url_large', 'No cover URL available')
        
        print(f"\n   {rank}. 📚 {title}")
        print(f"      👤 Author(s): {author_str}")
        print(f"      🆔 Book ID: {book_id}")
        print(f"      ⭐ Rating: {rating}")
        print(f"      🎯 Similarity Score: {similarity:.4f}")
        print(f"      🧠 ResNet Emotion: {resnet_emotion}")
        print(f"      📝 BERT Emotion: {bert_emotion}")
        print(f"      🖼️  Cover URL: {cover_url}")
        
        # Try to get book description
        description = book.get('description', None)
        if not description:
            description = self._get_book_description(book_id, title, author_str)
        
        if description:
            print(f"      📖 Description: {description[:200]}...")
        else:
            print(f"      📖 Description: No description available")
        
        print("      " + "─" * 70)
    
    def _get_book_description(self, book_id, title, author):
        """Try to fetch book description from various APIs"""
        try:
            import requests
            
            # Search by title and author using Google Books API
            search_query = f"{title} {author}".replace(' ', '+')
            google_books_url = f"https://www.googleapis.com/books/v1/volumes?q={search_query}&maxResults=1"
            
            response = requests.get(google_books_url, timeout=5)
            if response.status_code == 200:
                data = response.json()
                if 'items' in data and len(data['items']) > 0:
                    volume_info = data['items'][0].get('volumeInfo', {})
                    description = volume_info.get('description', '')
                    if description:
                        return description
            
            return None
            
        except Exception as e:
            return None
    
    def _create_book_cover_visualization(self, results):
        """Create a visualization grid showing actual book covers from image_url_large"""
        try:
            import requests
            from io import BytesIO
            
            fig, axes = plt.subplots(3, 3, figsize=(15, 18))
            fig.suptitle('Top Book Recommendations with Actual Covers', fontsize=16, fontweight='bold')
            
            # Get top book from each category of each approach
            books_to_show = [
                (results['resnet_recommendations']['top'][0], 'ResNet Top', 0, 0),
                (results['bert_recommendations']['top'][0], 'BERT Top', 0, 1),
                (results['multimodal_recommendations']['top'][0], 'Multimodal Top', 0, 2),
                (results['resnet_recommendations']['middle'][0], 'ResNet Middle', 1, 0),
                (results['bert_recommendations']['middle'][0], 'BERT Middle', 1, 1),
                (results['multimodal_recommendations']['middle'][0], 'Multimodal Middle', 1, 2),
                (results['resnet_recommendations']['bottom'][0], 'ResNet Bottom', 2, 0),
                (results['bert_recommendations']['bottom'][0], 'BERT Bottom', 2, 1),
                (results['multimodal_recommendations']['bottom'][0], 'Multimodal Bottom', 2, 2),
            ]
            
            for book, label, row, col in books_to_show:
                ax = axes[row, col]
                
                # Try to load actual book cover from image_url_large
                try:
                    cover_url = book.get('image_url_large', '')
                    
                    if cover_url and pd.notna(cover_url) and cover_url != 'No cover URL' and cover_url.startswith('http'):
                        print(f"Loading cover for {book['title'][:30]}...")
                        response = requests.get(cover_url, timeout=15, 
                                              headers={'User-Agent': 'Mozilla/5.0'})
                        
                        if response.status_code == 200:
                            cover_image = Image.open(BytesIO(response.content))
                            ax.imshow(cover_image)
                            print(f"✓ Loaded cover for {book['title'][:20]}...")
                        else:
                            raise Exception(f"HTTP {response.status_code}")
                    else:
                        raise Exception("No valid URL")
                        
                except Exception as e:
                    print(f"Could not load cover for {book['title'][:20]}...: {e}")
                    # Create placeholder for failed loads
                    placeholder = np.random.rand(300, 200, 3) * 0.3 + 0.7
                    ax.imshow(placeholder)
                    ax.text(0.5, 0.5, 'Cover\nUnavailable', 
                           transform=ax.transAxes, ha='center', va='center',
                           fontsize=12, fontweight='bold', 
                           bbox=dict(boxstyle="round,pad=0.3", facecolor="white", alpha=0.8))
                
                # Set title and details
                title = book['title'][:25] + '...' if len(book['title']) > 25 else book['title']
                similarity = book['similarity_score']
                rating = book.get('average_rating', 'N/A')
                
                ax.set_title(f"{label}\n{title}\nSim: {similarity:.3f} | ★{rating}", 
                            fontsize=10, fontweight='bold', pad=10)
                ax.axis('off')
            
            plt.tight_layout()
            plt.show()
            
        except Exception as e:
            print(f"Could not create cover visualization: {e}")
            print("Install required packages: pip install pillow requests")
    
    def get_book_urls_and_info(self, results, save_to_file=True):
        """Extract all book information and save to files"""
        book_info = {
            'image_analysis': results['image_prediction'],
            'recommendations': {}
        }
        
        approaches = ['resnet', 'bert', 'multimodal']
        categories = ['top', 'middle', 'bottom']
        
        for approach in approaches:
            book_info['recommendations'][approach] = {}
            
            for category in categories:
                books = results[f'{approach}_recommendations'][category]
                book_info['recommendations'][approach][category] = []
                
                for book in books:
                    # Handle authors
                    authors = book['authors']
                    if isinstance(authors, list) and len(authors) > 0:
                        try:
                            author_names = [a.get('role', 'Unknown') if isinstance(a, dict) else str(a) for a in authors]
                            author_str = ', '.join(author_names[:3])
                        except:
                            author_str = 'Unknown Author'
                    else:
                        author_str = 'Unknown Author'
                    
                    # Generate search URLs
                    book_urls = {
                        'actual_cover_url': book.get('image_url_large', 'No cover URL'),
                        'goodreads_search': f"https://www.goodreads.com/search?q={book['title'].replace(' ', '+')}",
                        'google_books_search': f"https://www.google.com/search?tbm=bks&q={book['title'].replace(' ', '+')}+{author_str.replace(' ', '+')}",
                        'amazon_search': f"https://www.amazon.com/s?k={book['title'].replace(' ', '+')}&i=stripbooks",
                    }
                    
                    book_entry = {
                        'rank': book.get('rank', 0),
                        'book_id': book['book_id'],
                        'title': book['title'],
                        'authors': author_str,
                        'average_rating': book.get('average_rating', 'N/A'),
                        'similarity_score': book['similarity_score'],
                        'resnet_emotion': book.get('resnet_predicted_emotion', 'N/A'),
                        'bert_emotion': book.get('bert_predicted_emotion', 'N/A'),
                        'cover_url': book.get('image_url_large', 'No cover URL'),
                        'description': book.get('description', 'No description available'),
                        'urls': book_urls
                    }
                    
                    book_info['recommendations'][approach][category].append(book_entry)
        
        # Save to file if requested
        if save_to_file:
            # Also create a simple CSV for easy viewing
            csv_filename = f"book_recommendations_{results['image_prediction']['dominant_emotion']}.csv"
            self._save_to_csv(book_info, csv_filename)
            print(f"💾 CSV summary saved to: {csv_filename}")
        
        return book_info
    
    def _save_to_csv(self, book_info, filename):
        """Save book recommendations to CSV format with actual cover URLs"""
        with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
            fieldnames = ['approach', 'category', 'rank', 'title', 'authors', 'rating', 
                         'similarity_score', 'resnet_emotion', 'bert_emotion', 'book_id',
                         'cover_url', 'description', 'goodreads_search_url']
            
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            
            for approach in ['resnet', 'bert', 'multimodal']:
                for category in ['top', 'middle', 'bottom']:
                    for book in book_info['recommendations'][approach][category]:
                        writer.writerow({
                            'approach': approach,
                            'category': category,
                            'rank': book['rank'],
                            'title': book['title'],
                            'authors': book['authors'],
                            'rating': book['average_rating'],
                            'similarity_score': book['similarity_score'],
                            'resnet_emotion': book['resnet_emotion'],
                            'bert_emotion': book['bert_emotion'],
                            'book_id': book['book_id'],
                            'cover_url': book['cover_url'],
                            'description': book['description'][:200] + '...' if len(book.get('description', '')) > 200 else book.get('description', ''),
                            'goodreads_search_url': book['urls']['goodreads_search']
                        })

    def create_emotion_bar_charts(self, results, save_dir="emotion_charts", show_plots=True):
        """Create and save emotion distribution bar charts for image and recommendations"""
        import os
        import matplotlib.pyplot as plt
        import numpy as np
        
        # Create save directory
        os.makedirs(save_dir, exist_ok=True)
        
        # Get image emotion distribution
        image_emotions = results['image_prediction']['emotion_distribution']
        dominant_emotion = results['image_prediction']['dominant_emotion']
        
        print(f"\n📊 Creating emotion distribution bar charts...")
        print(f"💾 Saving charts to: {save_dir}/")
        
        # 1. Create bar chart for input image
        fig, ax = plt.subplots(figsize=(12, 6))
        colors = plt.cm.viridis(np.linspace(0, 1, len(self.emotion_labels)))
        
        bars = ax.bar(range(len(self.emotion_labels)), image_emotions, 
                      color=colors, alpha=0.8, edgecolor='black', linewidth=0.5)
        
        # Highlight dominant emotion
        max_idx = np.argmax(image_emotions)
        bars[max_idx].set_edgecolor('red')
        bars[max_idx].set_linewidth(3)
        
        # Customize plot
        ax.set_xlabel('Emotions', fontsize=12, fontweight='bold')
        ax.set_ylabel('Probability', fontsize=12, fontweight='bold')
        ax.set_title(f'Input Image Emotion Distribution\n(Dominant: {dominant_emotion})', 
                    fontsize=14, fontweight='bold', pad=20)
        
        # Set x-axis labels
        ax.set_xticks(range(len(self.emotion_labels)))
        ax.set_xticklabels(self.emotion_labels, rotation=45, ha='right')
        
        # Add value labels on bars
        for bar, value in zip(bars, image_emotions):
            height = bar.get_height()
            ax.text(bar.get_x() + bar.get_width()/2., height + 0.005,
                   f'{value:.3f}', ha='center', va='bottom', fontsize=10, fontweight='bold')
        
        # Add grid and formatting
        ax.grid(True, alpha=0.3, axis='y')
        ax.set_axisbelow(True)
        ax.set_ylim(0, max(image_emotions) * 1.2)
        
        # Add dominant emotion annotation
        ax.text(0.02, 0.98, f'Dominant: {dominant_emotion}\nConfidence: {image_emotions[max_idx]:.3f}', 
                transform=ax.transAxes, fontsize=12, fontweight='bold',
                bbox=dict(boxstyle="round,pad=0.5", facecolor="yellow", alpha=0.8),
                verticalalignment='top')
        
        plt.tight_layout()
        
        # Save image emotion chart
        image_chart_path = os.path.join(save_dir, f'input_image_emotions_{dominant_emotion}.png')
        plt.savefig(image_chart_path, dpi=300, bbox_inches='tight')
        print(f"✓ Saved input image emotions: {image_chart_path}")
        
        if show_plots:
            plt.show()
        else:
            plt.close()
        
        # 2. Create comparison charts for top recommendations
        approaches = [
            ('resnet', 'ResNet', results['resnet_recommendations'], self.resnet_emotions_matrix),
            ('bert', 'BERT', results['bert_recommendations'], self.bert_emotions_matrix),
            ('multimodal', 'Multimodal', results['multimodal_recommendations'], self.multimodal_emotions_matrix)
        ]
        
        for approach_key, approach_name, recommendations, emotion_matrix in approaches:
            # Create comparison chart for top 3 books
            fig, axes = plt.subplots(2, 2, figsize=(16, 12))
            fig.suptitle(f'{approach_name} Approach: Top 3 Book Recommendations vs Input Image', 
                        fontsize=16, fontweight='bold')
            
            # Plot input image in top-left
            ax = axes[0, 0]
            bars = ax.bar(range(len(self.emotion_labels)), image_emotions, 
                         color='skyblue', alpha=0.8, edgecolor='black', linewidth=0.5)
            bars[max_idx].set_edgecolor('red')
            bars[max_idx].set_linewidth(3)
            
            ax.set_title(f'Input Image\n(Dominant: {dominant_emotion})', fontweight='bold')
            ax.set_xticks(range(len(self.emotion_labels)))
            ax.set_xticklabels(self.emotion_labels, rotation=45, ha='right')
            ax.set_ylabel('Probability')
            ax.grid(True, alpha=0.3, axis='y')
            
            # Add values on bars
            for bar, value in zip(bars, image_emotions):
                ax.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.005,
                       f'{value:.2f}', ha='center', va='bottom', fontsize=9)
            
            # Plot top 3 book recommendations
            positions = [(0, 1), (1, 0), (1, 1)]
            colors_books = ['lightcoral', 'lightgreen', 'lightsalmon']
            
            for i, (book, pos, color) in enumerate(zip(recommendations['top'][:3], positions, colors_books)):
                ax = axes[pos[0], pos[1]]
                
                # Get book emotions
                book_idx = self._get_book_index(book['book_id'])
                if book_idx is not None:
                    book_emotions = emotion_matrix[book_idx]
                    book_max_idx = np.argmax(book_emotions)
                    
                    # Create bars
                    bars = ax.bar(range(len(self.emotion_labels)), book_emotions, 
                                 color=color, alpha=0.8, edgecolor='black', linewidth=0.5)
                    bars[book_max_idx].set_edgecolor('red')
                    bars[book_max_idx].set_linewidth(3)
                    
                    # Formatting
                    book_title = book['title'][:30] + '...' if len(book['title']) > 30 else book['title']
                    book_emotion = self.emotion_labels[book_max_idx]
                    similarity = book['similarity_score']
                    
                    ax.set_title(f'Book {i+1}: {book_title}\n(Dominant: {book_emotion}, Sim: {similarity:.3f})', 
                               fontweight='bold', fontsize=11)
                    ax.set_xticks(range(len(self.emotion_labels)))
                    ax.set_xticklabels(self.emotion_labels, rotation=45, ha='right')
                    ax.set_ylabel('Probability')
                    ax.grid(True, alpha=0.3, axis='y')
                    
                    # Add values on bars
                    for bar, value in zip(bars, book_emotions):
                        if value > 0.1:  # Only show values > 0.1 to avoid clutter
                            ax.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.005,
                                   f'{value:.2f}', ha='center', va='bottom', fontsize=9)
            
            plt.tight_layout()
            
            # Save comparison chart
            comparison_path = os.path.join(save_dir, f'{approach_key}_emotion_comparison_{dominant_emotion}.png')
            plt.savefig(comparison_path, dpi=300, bbox_inches='tight')
            print(f"✓ Saved {approach_name} comparison: {comparison_path}")
            
            if show_plots:
                plt.show()
            else:
                plt.close()
        
        # 3. Create comprehensive heatmap
        self._create_comprehensive_emotion_heatmap(results, save_dir, show_plots)
        
        return save_dir
    
    def _create_comprehensive_emotion_heatmap(self, results, save_dir, show_plots=True):
        """Create a comprehensive heatmap showing all emotion distributions"""
        import matplotlib.pyplot as plt
        import seaborn as sns
        import os
        
        # Collect all emotion distributions
        image_emotions = results['image_prediction']['emotion_distribution']
        dominant_emotion = results['image_prediction']['dominant_emotion']
        
        all_emotions = [image_emotions]
        labels = ['Input Image']
        
        # Get top 2 from each approach
        approaches = [
            ('ResNet', results['resnet_recommendations']['top'][:2], self.resnet_emotions_matrix),
            ('BERT', results['bert_recommendations']['top'][:2], self.bert_emotions_matrix),
            ('Multimodal', results['multimodal_recommendations']['top'][:2], self.multimodal_emotions_matrix)
        ]
        
        for approach_name, books, emotion_matrix in approaches:
            for i, book in enumerate(books, 1):
                book_idx = self._get_book_index(book['book_id'])
                if book_idx is not None:
                    book_emotions = emotion_matrix[book_idx]
                    all_emotions.append(book_emotions)
                    
                    book_title = book['title'][:25] + '...' if len(book['title']) > 25 else book['title']
                    similarity = book['similarity_score']
                    labels.append(f"{approach_name}-{i}\n{book_title}\n(Sim: {similarity:.3f})")
        
        # Create emotion matrix for heatmap
        emotion_matrix = np.array(all_emotions)
        
        # Create heatmap
        plt.figure(figsize=(14, 10))
        
        # Custom colormap
        heatmap = sns.heatmap(emotion_matrix, 
                             xticklabels=self.emotion_labels,
                             yticklabels=labels,
                             annot=True, 
                             fmt='.3f',
                             cmap='YlOrRd',
                             cbar_kws={'label': 'Emotion Probability'},
                             linewidths=0.5)
        
        plt.title(f'Emotion Distribution Heatmap\nInput Image vs Top Book Recommendations\n(Dominant Emotion: {dominant_emotion})', 
                 fontsize=14, fontweight='bold', pad=20)
        plt.xlabel('Emotions', fontweight='bold', fontsize=12)
        plt.ylabel('Items', fontweight='bold', fontsize=12)
        plt.xticks(rotation=45, ha='right')
        plt.yticks(rotation=0)
        
        # Highlight the input image row
        heatmap.add_patch(plt.Rectangle((0, 0), len(self.emotion_labels), 1, 
                                      fill=False, edgecolor='blue', lw=3))
        
        plt.tight_layout()
        
        # Save heatmap
        heatmap_path = os.path.join(save_dir, f'emotion_heatmap_{dominant_emotion}.png')
        plt.savefig(heatmap_path, dpi=300, bbox_inches='tight')
        print(f"✓ Saved emotion heatmap: {heatmap_path}")
        
        if show_plots:
            plt.show()
        else:
            plt.close()
    
    def create_detailed_emotion_report(self, results, save_dir="emotion_analysis_2"):
        """Create detailed emotion analysis report with charts and statistics"""
        import os
        import json
        
        os.makedirs(save_dir, exist_ok=True)
        
        print(f"\n📈 Creating detailed emotion analysis report...")
        
        # Create all bar charts
        chart_dir = os.path.join(save_dir, "charts")
        self.create_emotion_bar_charts(results, save_dir=chart_dir, show_plots=False)
        
        # Create detailed statistics
        image_emotions = results['image_prediction']['emotion_distribution']
        dominant_emotion = results['image_prediction']['dominant_emotion']
        
        # Calculate emotion statistics for each approach
        emotion_analysis = {
            'input_image': {
                'dominant_emotion': dominant_emotion,
                'confidence': float(results['image_prediction']['confidence']),
                'emotion_distribution': {emotion: float(prob) for emotion, prob 
                                       in zip(self.emotion_labels, image_emotions)},
                'top_3_emotions': []
            },
            'recommendations_analysis': {}
        }
        
        # Get top 3 emotions for input image
        top_3_indices = np.argsort(image_emotions)[-3:][::-1]
        for idx in top_3_indices:
            emotion_analysis['input_image']['top_3_emotions'].append({
                'emotion': self.emotion_labels[idx],
                'probability': float(image_emotions[idx])
            })
        
        # Analyze each approach
        approaches = [
            ('resnet', results['resnet_recommendations'], self.resnet_emotions_matrix),
            ('bert', results['bert_recommendations'], self.bert_emotions_matrix),
            ('multimodal', results['multimodal_recommendations'], self.multimodal_emotions_matrix)
        ]
        
        for approach_name, recommendations, emotion_matrix in approaches:
            emotion_analysis['recommendations_analysis'][approach_name] = {
                'top_books': []
            }
            
            for i, book in enumerate(recommendations['top'][:3], 1):
                book_idx = self._get_book_index(book['book_id'])
                if book_idx is not None:
                    book_emotions = emotion_matrix[book_idx]
                    
                    # Calculate similarities
                    cosine_sim = np.dot(image_emotions, book_emotions) / (
                        np.linalg.norm(image_emotions) * np.linalg.norm(book_emotions))
                    kl_div = self._calculate_kl_divergence(image_emotions, book_emotions)
                    
                    # Get top 3 emotions for this book
                    book_top_3_indices = np.argsort(book_emotions)[-3:][::-1]
                    book_top_3 = []
                    for idx in book_top_3_indices:
                        book_top_3.append({
                            'emotion': self.emotion_labels[idx],
                            'probability': float(book_emotions[idx])
                        })
                    
                    book_analysis = {
                        'rank': i,
                        'title': book['title'],
                        'book_id': book['book_id'],
                        'similarity_score': float(book['similarity_score']),
                        'cosine_similarity': float(cosine_sim),
                        'kl_divergence': float(kl_div),
                        'dominant_emotion': self.emotion_labels[np.argmax(book_emotions)],
                        'emotion_distribution': {emotion: float(prob) for emotion, prob 
                                               in zip(self.emotion_labels, book_emotions)},
                        'top_3_emotions': book_top_3,
                        'emotion_differences': {}
                    }
                    
                    # Calculate emotion differences
                    for j, emotion in enumerate(self.emotion_labels):
                        diff = float(book_emotions[j] - image_emotions[j])
                        book_analysis['emotion_differences'][emotion] = diff
                    
                    emotion_analysis['recommendations_analysis'][approach_name]['top_books'].append(book_analysis)
        
        # Save detailed analysis
        analysis_file = os.path.join(save_dir, f'emotion_analysis_{dominant_emotion}.json')
        with open(analysis_file, 'w', encoding='utf-8') as f:
            json.dump(emotion_analysis, f, indent=2, ensure_ascii=False)
        
        print(f"✓ Saved detailed emotion analysis: {analysis_file}")
        print(f"✓ Saved emotion charts in: {chart_dir}/")
        
        return emotion_analysis


In [None]:

# ================== MAIN EXECUTION ==================

# ================== UPDATED MAIN EXECUTION ==================

def main():
    """Main execution function with emotion chart creation"""
    print("🚀 Starting Complete Image-Book Emotion Matching System")
    
    # Initialize the complete matcher
    matcher = CompleteImageBookMatcher(
        use_gpu_similarity=True,    
        precompute_indices=True     
    )
    
    # Example usage - UPDATE THIS PATH
    image_path = "input_images/anger_painting.jpg"
    
    # Run the complete analysis
    if os.path.exists(image_path):
        print(f"\n📸 Analyzing image: {image_path}")
        
        # Fast matching with comprehensive results
        results = matcher.find_similar_books_complete(
            image_path=image_path,
            top_k=686907,     
            final_n=3       
        )
        
        # Display comprehensive results WITH emotion charts
        print("\n" + "🔍" * 50)
        matcher.display_detailed_recommendations(
            results, 
            # show_covers=True, 
            show_emotion_analysis=True,
            # save_charts=True  # NEW: Save emotion bar charts
        )
        
        print(f"\n✅ ANALYSIS COMPLETE!")
        print(f"📊 Emotion charts saved to: emotion_charts/")
        print(f"📈 Detailed analysis saved to: emotion_analysis/")
        return results
    else:
        print(f"❌ Error: Image file not found: {image_path}")
        return None

results = main()

In [5]:
matcher = CompleteImageBookMatcher(
    use_gpu_similarity=True,    
    precompute_indices=True     
)


INITIALIZING COMPLETE IMAGE-BOOK EMOTION MATCHING SYSTEM
Loading ResNet model...
✓ ResNet model loaded and tested successfully
Loading and preprocessing book emotion distributions with image URLs...
Loading preprocessed books data for image URLs...
✓ Loaded 931229 books from preprocessed data
✓ Found 931229 books with image URLs
Loading ResNet emotion predictions...
Loading BERT emotion predictions...
✓ Loaded ResNet emotions for 686990 books
✓ Loaded BERT emotions for 686946 books
Merging ResNet data with image URLs...
Merging BERT data with image URLs...
✓ Final dataset: 686907 books with both ResNet and BERT predictions
✓ ResNet books with image URLs: 686907
✓ BERT books with image URLs: 686907
Precomputing emotion matrices for fast similarity...
Precomputing multimodal combinations...
Moving emotion matrices to GPU for fast similarity...
Building FAISS indices for similarity search...
✓ FAISS indices built successfully
✓ System ready! Loaded 686907 books for matching


array([0.16998516, 0.23982911, 0.19944605, ..., 0.20143256, 0.20434676,
       0.21503952], shape=(686907,), dtype=float32)

In [7]:
matcher.resnet_confidences, matcher.bert_confidences 

(array([0.16998516, 0.23982911, 0.19944605, ..., 0.20143256, 0.20434676,
        0.21503952], shape=(686907,), dtype=float32),
 array([0.22007017, 0.84584194, 0.43285942, ..., 0.5469671 , 0.88071233,
        0.37570584], shape=(686907,), dtype=float32))

In [9]:
def convert_ndarray(obj):
    if isinstance(obj, np.ndarray):
        return obj.tolist()
    if isinstance(obj, (np.floating, np.integer)):
        return obj.item()
    if isinstance(obj, dict):
        return {k: convert_ndarray(v) for k, v in obj.items()}
    if isinstance(obj, list):
        return [convert_ndarray(i) for i in obj]
    return obj

results_serializable = convert_ndarray(results)

with open("survey_recommendations_info.json", "w", encoding="utf-8") as f:
    json.dump(results_serializable, f, ensure_ascii=False, indent=2)


In [10]:
# Define the models and their corresponding keys in the data dictionary
model_keys = {
    "ResNet": "resnet_recommendations",
    "BERT": "bert_recommendations",
    "Multimodal": "multimodal_recommendations"
}

# Dictionary to store the results
urls_by_model = {
    "ResNet": [],
    "BERT": [],
    "Multimodal": []
}

# Iterate through each model key
for model_name, data_key in model_keys.items():
    if data_key in results and isinstance(results[data_key], dict):
        # Iterate through 'top', 'middle', 'bottom' categories
        for category in ['top', 'middle', 'bottom']:
            if category in results[data_key] and isinstance(results[data_key][category], list):
                # Iterate through each book in the category
                for book in results[data_key][category]:
                    if 'image_url_large' in book:
                        urls_by_model[model_name].append(book['image_url_large'])

# Print the results as Python lists
for model_name, urls in urls_by_model.items():
    print(f"{model_name}_urls = {urls}")

ResNet_urls = ['https://images.gr-assets.com/books/1290170247l/9330885.jpg', 'https://images.gr-assets.com/books/1340031123l/14402305.jpg', 'https://images.gr-assets.com/books/1313500615l/12078346.jpg', 'https://images.gr-assets.com/books/1386585006l/19308342.jpg', 'https://images.gr-assets.com/books/1344721731l/13346464.jpg', 'https://images.gr-assets.com/books/1355050076l/278056.jpg', 'https://images.gr-assets.com/books/1381565460l/18663384.jpg', 'https://images.gr-assets.com/books/1327974189l/7901182.jpg', 'https://images.gr-assets.com/books/1355061113l/173589.jpg']
BERT_urls = ['https://images.gr-assets.com/books/1457963343l/29505492.jpg', 'https://images.gr-assets.com/books/1320543569l/1411357.jpg', 'https://images.gr-assets.com/books/1468230274l/28595947.jpg', 'https://images.gr-assets.com/books/1395620769l/1064192.jpg', 'https://images.gr-assets.com/books/1327605330l/6970002.jpg', 'https://images.gr-assets.com/books/1320500996l/3364596.jpg', 'https://images.gr-assets.com/books/1