<a href="https://colab.research.google.com/github/HaqTetsuya/ChatbotPerpusBipa/blob/main/bookrecomend2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q sentence-transformers nltk pandas scikit-learn
!git clone https://github.com/HaqTetsuya/ChatbotPerpusBipa.git
!wget https://raw.githubusercontent.com/HaqTetsuya/rusdi-prototype-1/main/py/BooksDatasetCleanFiltered.csv
!wget https://raw.githubusercontent.com/HaqTetsuya/ChatbotPerpusBipa/main/BookDatasetSample.csv

In [None]:
"""
# Book Recommender System - Google Colab Pipeline
# This notebook demonstrates how to use the BookRecommender class to build
# a recommendation system for books based on semantic similarity.
"""

# Install required packages
!pip install -q sentence-transformers nltk pandas scikit-learn

# Import necessary libraries
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from google.colab import files
import pickle

# Save the book recommender code to a file
with open('book_recommender.py', 'w') as f:
    # Copy the entire code from the original file here
    f.write('''import os
import re
import pickle
import logging
import numpy as np
import pandas as pd
from typing import List, Dict, Union, Optional, Tuple
from dataclasses import dataclass
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
nltk.download('punkt_tab')
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize

# Set up logging
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger('book_recommender')

@dataclass
class BookRecommendation:
    """Data class for book recommendations with standardized attributes."""
    title: str
    author: str
    category: str = ""
    year: Union[int, str] = ""
    description: str = ""
    relevance_score: float = 0.0
    rank: int = 0

    def to_dict(self) -> Dict:
        """Convert recommendation to dictionary."""
        return {
            'title': self.title,
            'author': self.author,
            'category': self.category,
            'year': self.year,
            'description': self.description,
            'relevance_score': self.relevance_score,
            'rank': self.rank
        }

class BookRecommender:
    """A semantic-based book recommendation system with enhanced features."""

    def __init__(self, model_name: str = 'all-mpnet-base-v2'):
        """Initialize the book recommender with specified model.

        Args:
            model_name: Name of the sentence transformer model to use
        """
        self.model_name = model_name
        self.model = None
        self.book_embeddings = None
        self.df = None

        # Initialize NLP components with lazy loading
        self._stop_words = None
        self._lemmatizer = None
        self._nlp_initialized = False

        # Cache for query embeddings to avoid recomputing
        self.query_cache = {}

        # Maximum cache size
        self.max_cache_size = 100

        logger.info(f"BookRecommender initialized with model: {model_name}")

    @property
    def stop_words(self):
        """Lazy loading of stopwords."""
        if self._stop_words is None:
            try:
                nltk.data.find('corpora/stopwords')
            except LookupError:
                logger.info("Downloading NLTK stopwords")
                nltk.download('stopwords', quiet=True)
            self._stop_words = set(stopwords.words('english'))
        return self._stop_words

    @property
    def lemmatizer(self):
        """Lazy loading of lemmatizer."""
        if self._lemmatizer is None:
            try:
                nltk.data.find('corpora/wordnet')
            except LookupError:
                logger.info("Downloading NLTK wordnet")
                nltk.download('wordnet', quiet=True)
                nltk.download('punkt', quiet=True)
            self._lemmatizer = WordNetLemmatizer()
        return self._lemmatizer

    def preprocess_text(self, text: str) -> str:
        """Advanced text preprocessing with stopword removal and lemmatization.

        Args:
            text: Text to preprocess

        Returns:
            Preprocessed text string
        """
        if not isinstance(text, str) or not text.strip():
            return ""

        # Convert to lowercase and remove special characters
        text = text.lower()
        text = re.sub(r'[^\w\s]', ' ', text)

        # Ensure punkt tokenizer is downloaded
        try:
            nltk.data.find('tokenizers/punkt')
        except LookupError:
            logger.info("Downloading NLTK punkt tokenizer")
            nltk.download('punkt', quiet=True)

        # Tokenize, remove stopwords, and lemmatize
        tokens = word_tokenize(text)
        tokens = [self.lemmatizer.lemmatize(word) for word in tokens
                if word not in self.stop_words and len(word) > 1]

        return ' '.join(tokens)


    def load_model(self, folder_path: str = "recommender_model") -> bool:
        """Load a previously saved model and embeddings for inference.

        Args:
            folder_path: Path to the folder containing saved model components

        Returns:
            Boolean indicating if loading was successful
        """
        try:
            # Check if folder exists
            if not os.path.exists(folder_path):
                logger.error(f"Model folder {folder_path} does not exist.")
                return False

            # Load configuration
            config_path = os.path.join(folder_path, "config.pkl")
            if not os.path.exists(config_path):
                logger.error(f"Configuration file not found at {config_path}")
                return False

            with open(config_path, 'rb') as f:
                config = pickle.load(f)
            self.model_name = config.get('model_name', self.model_name)
            logger.info(f"Loaded configuration: model_name={self.model_name}")

            # Load the sentence transformer model
            model_path = os.path.join(folder_path, "sentence_transformer")
            if os.path.exists(model_path):
                self.model = SentenceTransformer(model_path)
                logger.info(f"Model loaded from {model_path}")
            else:
                # Fall back to loading from HuggingFace if local model not found
                logger.info(f"Local model not found, loading {self.model_name} from HuggingFace")
                self.model = SentenceTransformer(self.model_name)

            # Load book embeddings
            embeddings_path = os.path.join(folder_path, "book_embeddings.pkl")
            if not os.path.exists(embeddings_path):
                logger.error(f"Embeddings file not found at {embeddings_path}")
                return False

            with open(embeddings_path, 'rb') as f:
                self.book_embeddings = pickle.load(f)
            logger.info(f"Embeddings loaded: {len(self.book_embeddings)} book vectors")

            # Load the DataFrame
            df_path = os.path.join(folder_path, "books_data.pkl")
            if not os.path.exists(df_path):
                logger.error(f"Books data file not found at {df_path}")
                return False

            with open(df_path, 'rb') as f:
                self.df = pickle.load(f)
            logger.info(f"DataFrame loaded: {len(self.df)} books")

            # Clear cache on model reload
            self.query_cache = {}

            return True

        except Exception as e:
            logger.error(f"Error loading model: {str(e)}", exc_info=True)
            return False

    def get_query_embedding(self, query: str) -> np.ndarray:
        """Get embedding for a query with caching.

        Args:
            query: The user's query text

        Returns:
            Embedding vector for the query
        """
        # Check if query is in cache
        if query in self.query_cache:
            return self.query_cache[query]

        # Process query and generate embedding
        processed_query = self.preprocess_text(query)
        query_embedding = self.model.encode([processed_query])

        # Add to cache, managing cache size
        if len(self.query_cache) >= self.max_cache_size:
            # Remove oldest item (FIFO)
            self.query_cache.pop(next(iter(self.query_cache)))

        self.query_cache[query] = query_embedding
        return query_embedding

    def recommend_books(self, user_query: str, top_n: int = 5,
                        include_description: bool = True,
                        min_score: float = 0.0,
                        filter_category: Optional[str] = None) -> List[Dict]:
        """Recommend books based on user query.

        Args:
            user_query: Text query from user
            top_n: Number of recommendations to return
            include_description: Whether to include book descriptions
            min_score: Minimum similarity score (0-1) for recommendations
            filter_category: Optional category to filter results

        Returns:
            List of book recommendation dictionaries
        """
        if self.model is None or self.book_embeddings is None or self.df is None:
            logger.error("Model not initialized. Cannot make recommendations.")
            return []

        if not user_query.strip():
            logger.warning("Empty query received")
            return []

        logger.info(f"Finding books similar to: '{user_query}'")

        try:
            # Get query embedding (cached if available)
            user_embedding = self.get_query_embedding(user_query)

            # Compute similarity between query and books
            similarities = cosine_similarity(user_embedding, self.book_embeddings)[0]

            # Filter by minimum score if specified
            valid_indices = np.where(similarities >= min_score)[0]
            if len(valid_indices) == 0:
                logger.info(f"No books met the minimum similarity threshold of {min_score}")
                return []

            # Create DataFrame with indices and scores for easier filtering
            results_df = pd.DataFrame({
                'index': range(len(similarities)),
                'score': similarities
            })

            # Filter by category if specified
            if filter_category and 'Category' in self.df.columns:
                category_mask = self.df['Category'].str.contains(filter_category, case=False, na=False)
                valid_indices = [i for i in valid_indices if category_mask.iloc[i]]
                if len(valid_indices) == 0:
                    logger.info(f"No books found in category '{filter_category}'")
                    return []

                # Update results DataFrame
                results_df = results_df[results_df['index'].isin(valid_indices)]

            # Sort and get top N
            results_df = results_df.sort_values('score', ascending=False).head(top_n)
            similar_books_idx = results_df['index'].tolist()

            # Generate recommendations
            recommendations = []

            for i, idx in enumerate(similar_books_idx):
                book_row = self.df.iloc[idx]

                # Create recommendation using the dataclass
                book_data = BookRecommendation(
                    title=book_row.get('Title', 'Unknown Title'),
                    author=book_row.get('Authors', 'Unknown Author'),
                    category=book_row.get('Category', ''),
                    year=book_row.get('Publish Date (Year)', ''),
                    relevance_score=float(similarities[idx]),
                    rank=i + 1
                )

                # Add description if requested
                if include_description and 'Description' in self.df.columns:
                    # Truncate long descriptions
                    description = book_row['Description']
                    if isinstance(description, str) and len(description) > 200:
                        description = description[:197] + "..."
                    book_data.description = description

                recommendations.append(book_data.to_dict())

            logger.info(f"Successfully generated {len(recommendations)} recommendations")
            return recommendations

        except Exception as e:
            logger.error(f"Error generating recommendations: {str(e)}", exc_info=True)
            return []

    def get_similar_books(self, book_id: int, top_n: int = 5) -> List[Dict]:
        """Find books similar to a specific book in the dataset.

        Args:
            book_id: Index of the book in the dataset
            top_n: Number of similar books to return

        Returns:
            List of similar book recommendations
        """
        if self.book_embeddings is None or self.df is None:
            logger.error("Model not initialized. Cannot find similar books.")
            return []

        if book_id < 0 or book_id >= len(self.df):
            logger.error(f"Book ID {book_id} out of range (0-{len(self.df)-1})")
            return []

        try:
            # Get the book's embedding
            book_embedding = self.book_embeddings[book_id].reshape(1, -1)

            # Compute similarities between this book and all others
            similarities = cosine_similarity(book_embedding, self.book_embeddings)[0]

            # Get top N+1 (including the book itself)
            similar_books_idx = np.argsort(similarities)[-top_n-1:][::-1]

            # Remove the original book from results
            similar_books_idx = [idx for idx in similar_books_idx if idx != book_id][:top_n]

            # Format results
            recommendations = []
            for i, idx in enumerate(similar_books_idx):
                book_row = self.df.iloc[idx]

                book_data = BookRecommendation(
                    title=book_row.get('Title', 'Unknown Title'),
                    author=book_row.get('Authors', 'Unknown Author'),
                    category=book_row.get('Category', ''),
                    year=book_row.get('Publish Date (Year)', ''),
                    description=book_row.get('Description', '')[:197] + "..." if isinstance(book_row.get('Description', ''), str) and len(book_row.get('Description', '')) > 200 else book_row.get('Description', ''),
                    relevance_score=float(similarities[idx]),
                    rank=i + 1
                )

                recommendations.append(book_data.to_dict())

            return recommendations

        except Exception as e:
            logger.error(f"Error finding similar books: {str(e)}", exc_info=True)
            return []

    def search_by_keywords(self, keywords: Union[str, List[str]],
                           fields: List[str] = ['Title', 'Description'],
                           top_n: int = 10) -> List[Dict]:
        """Search for books by keywords in specific fields.

        Args:
            keywords: Search keywords as string or list
            fields: DataFrame columns to search in
            top_n: Maximum number of results to return

        Returns:
            List of matching book dictionaries
        """
        if self.df is None:
            logger.error("Book data not loaded. Cannot search.")
            return []

        if not keywords:
            return []

        # Handle different keyword input formats
        if isinstance(keywords, str):
            keyword_list = [kw.strip().lower() for kw in keywords.split() if kw.strip()]
        elif isinstance(keywords, list):
            keyword_list = [kw.strip().lower() for kw in keywords if isinstance(kw, str) and kw.strip()]
        else:
            logger.error(f"Invalid keywords format: {type(keywords)}")
            return []

        if not keyword_list:
            return []

        try:
            results = []

            # Create a copy of the DataFrame for search
            search_df = self.df.copy()

            # Initialize match score column
            search_df['match_score'] = 0

            # Search in each specified field
            for field in fields:
                if field not in search_df.columns:
                    logger.warning(f"Field '{field}' not found in data")
                    continue

                # Convert field to string (if not already)
                search_df[field] = search_df[field].astype(str)

                # Calculate match scores based on keyword presence
                for keyword in keyword_list:
                    # Add points for each keyword match
                    search_df['match_score'] += search_df[field].str.lower().str.contains(keyword, regex=False).astype(int)

            # Sort by match score and get top results
            matches = search_df[search_df['match_score'] > 0].sort_values('match_score', ascending=False).head(top_n)

            # Format results
            for i, (_, book) in enumerate(matches.iterrows()):
                result = {
                    'title': book.get('Title', 'Unknown Title'),
                    'author': book.get('Authors', 'Unknown Author'),
                    'category': book.get('Category', ''),
                    'year': book.get('Publish Date (Year)', ''),
                    'keyword_match_score': int(book['match_score']),
                    'rank': i + 1
                }

                if 'Description' in book:
                    description = book['Description']
                    if isinstance(description, str) and len(description) > 200:
                        description = description[:197] + "..."
                    result['description'] = description

                results.append(result)

            logger.info(f"Found {len(results)} books matching keywords: {keywords}")
            return results

        except Exception as e:
            logger.error(f"Error searching by keywords: {str(e)}", exc_info=True)
            return []

    def save_model(self, folder_path: str = "recommender_model") -> bool:
        """Save the current model, embeddings, and data to disk.

        Args:
            folder_path: Directory to save model components

        Returns:
            Boolean indicating if saving was successful
        """
        if self.model is None or self.book_embeddings is None or self.df is None:
            logger.error("Model not fully initialized. Cannot save.")
            return False

        try:
            # Create directory if it doesn't exist
            os.makedirs(folder_path, exist_ok=True)

            # Save configuration
            config = {
                'model_name': self.model_name,
                'embedding_size': self.book_embeddings.shape[1],
                'num_books': len(self.df),
                'version': '2.0'
            }

            config_path = os.path.join(folder_path, "config.pkl")
            with open(config_path, 'wb') as f:
                pickle.dump(config, f)

            # Save the model
            model_path = os.path.join(folder_path, "sentence_transformer")
            self.model.save(model_path)

            # Save embeddings
            embeddings_path = os.path.join(folder_path, "book_embeddings.pkl")
            with open(embeddings_path, 'wb') as f:
                pickle.dump(self.book_embeddings, f)

            # Save dataframe
            df_path = os.path.join(folder_path, "books_data.pkl")
            with open(df_path, 'wb') as f:
                pickle.dump(self.df, f)

            logger.info(f"Model successfully saved to {folder_path}")
            return True

        except Exception as e:
            logger.error(f"Error saving model: {str(e)}", exc_info=True)
            return False

    def train(self, books_df: pd.DataFrame,
              text_columns: List[str] = ['Title', 'Description', 'Category' ],
              batch_size: int = 32) -> bool:
        """Train the recommender on a new dataset.

        Args:
            books_df: DataFrame containing book information
            text_columns: Columns to use for generating book embeddings
            batch_size: Batch size for embedding generation

        Returns:
            Boolean indicating if training was successful
        """
        try:
            # Validate input data
            if books_df.empty:
                logger.error("Empty DataFrame provided for training")
                return False

            for col in text_columns:
                if col not in books_df.columns:
                    logger.error(f"Required column '{col}' not found in DataFrame")
                    return False

            # Store the DataFrame
            self.df = books_df.copy()
            logger.info(f"Training on {len(self.df)} books")

            # Initialize model if needed
            if self.model is None:
                logger.info(f"Loading model: {self.model_name}")
                self.model = SentenceTransformer(self.model_name)

            # Combine text columns for each book
            combined_texts = []
            for _, book in self.df.iterrows():
                text_parts = []
                for col in text_columns:
                    if isinstance(book[col], str) and book[col].strip():
                        text_parts.append(book[col])

                # Concatenate with spaces between parts
                combined_text = " ".join(text_parts)
                # Preprocess the combined text
                processed_text = self.preprocess_text(combined_text)
                combined_texts.append(processed_text)

            # Generate embeddings in batches
            logger.info("Generating book embeddings...")
            all_embeddings = []

            for i in range(0, len(combined_texts), batch_size):
                batch = combined_texts[i:i+batch_size]
                batch_embeddings = self.model.encode(batch)
                all_embeddings.append(batch_embeddings)
                logger.info(f"Processed batch {i//batch_size + 1}/{(len(combined_texts)-1)//batch_size + 1}")

            # Combine batch results
            self.book_embeddings = np.vstack(all_embeddings)

            logger.info(f"Training complete. Generated {len(self.book_embeddings)} embeddings.")
            return True

        except Exception as e:
            logger.error(f"Error during training: {str(e)}", exc_info=True)
            return False''')

# Import the BookRecommender class
from book_recommender import BookRecommender

# Function to download sample books dataset
def download_sample_books_dataset():
    """Downloads a sample books dataset for training the recommender system."""

    print("Downloading sample books dataset...")

    # Option 1: Kaggle Books Dataset (if you have Kaggle API set up)
    try:
        # Uncomment these lines if you have Kaggle API configured
        # !pip install -q kaggle
        # !mkdir -p ~/.kaggle
        # !cp /content/kaggle.json ~/.kaggle/
        # !chmod 600 ~/.kaggle/kaggle.json
        # !kaggle datasets download -d jealousleopard/goodreadsbooks
        # !unzip -q goodreadsbooks.zip
        # books_df = pd.read_csv('books.csv')
        # If the above doesn't work, use the fallback option below
        raise Exception("Using fallback dataset")
    except:
        # Option 2: Direct URL to a sample books dataset (fallback)
        print("Using fallback dataset source...")
        url = "https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/books.csv"
        books_df = pd.read_csv(url)

    # Clean and preprocess the dataset
    # Rename columns to match the expected format
    column_mapping = {
        'authors': 'Authors',
        'title': 'Title',
        'original_publication_year': 'Publish Date (Year)',
        'original_title': 'Description'  # Using original_title as Description for demo purposes
    }

    # Map columns and handle missing values
    for old_col, new_col in column_mapping.items():
        if old_col in books_df.columns:
            books_df[new_col] = books_df[old_col]

    # Add a Category column if missing
    if 'Category' not in books_df.columns:
        # Create simple categories based on book titles (just for demonstration)
        def assign_category(title):
            if not isinstance(title, str):
                return "Unknown"
            title = title.lower()
            categories = {
                'fiction': ['novel', 'story', 'tales', 'fiction'],
                'fantasy': ['fantasy', 'magic', 'wizard', 'dragon', 'myth'],
                'science fiction': ['space', 'planet', 'star', 'galaxy', 'future', 'robot'],
                'mystery': ['mystery', 'detective', 'crime', 'thriller', 'murder'],
                'romance': ['love', 'romance', 'heart', 'passion'],
                'biography': ['life', 'memoir', 'biography', 'autobiography'],
                'history': ['history', 'historical', 'century', 'war'],
                'self-help': ['self', 'help', 'success', 'habit', 'productivity']
            }

            for category, keywords in categories.items():
                if any(keyword in title for keyword in keywords):
                    return category
            return "General"

        books_df['Category'] = books_df['Title'].apply(assign_category)

    # Ensure we have necessary columns and limit to a manageable size for Colab
    required_columns = ['Title', 'Authors', 'Category', 'Publish Date (Year)', 'Description']
    for col in required_columns:
        if col not in books_df.columns:
            books_df[col] = ""

    # Limit dataset size to avoid memory issues
    if len(books_df) > 5000:
        books_df = books_df.head(5000)

    print(f"Dataset prepared with {len(books_df)} books.")
    return books_df

# Now let's run the full pipeline
def run_book_recommender_pipeline():
    """Complete pipeline for training and using the book recommender system."""

    print("Starting Book Recommender Pipeline")
    print("==================================")

    # 1. Initialize the recommender
    print("\n1. Initializing the book recommender...")
    recommender = BookRecommender(model_name='all-mpnet-base-v2')

    # 2. Check if a saved model exists and load it
    model_dir = "recommender_model"
    if os.path.exists(model_dir):
        print("\n2. Found existing model, loading...")
        success = recommender.load_model(model_dir)
        if success:
            print("Model loaded successfully!")
        else:
            print("Failed to load model, will train a new one.")
    else:
        print("\n2. No existing model found, will train a new one.")

    # 3. If no model was loaded, get data and train
    if recommender.df is None or recommender.book_embeddings is None:
        print("\n3. Preparing training data...")
        books_df = download_sample_books_dataset()

        print("\n4. Training model...")
        # Train with smaller batch size for Colab
        training_success = recommender.train(
            books_df=books_df,
            text_columns=['Title', 'Description', 'Category'],
            batch_size=16  # Smaller batch size for Colab
        )

        if training_success:
            print("Training completed successfully!")

            # Save the trained model
            print("\n5. Saving model...")
            recommender.save_model(model_dir)
            print(f"Model saved to {model_dir}")
        else:
            print("Training failed.")
            return

    # 4. Display some dataset statistics
    print("\n6. Dataset Statistics:")
    print(f"Number of books: {len(recommender.df)}")
    if 'Category' in recommender.df.columns:
        category_counts = recommender.df['Category'].value_counts().head(10)
        print("Top 10 categories:")
        for category, count in category_counts.items():
            print(f"  - {category}: {count} books")

    # 5. Test the recommender with sample queries
    print("\n7. Testing recommendation functionality...")
    sample_queries = [
        "adventure in a magical world",
        "science fiction with robots",
        "romantic love story",
        "historical biography"
    ]

    for query in sample_queries:
        print(f"\nQuery: '{query}'")
        recommendations = recommender.recommend_books(query, top_n=3)

        if recommendations:
            print("Top 3 recommendations:")
            for book in recommendations:
                print(f"  - {book['title']} by {book['author']} ({book['category']}) - Score: {book['relevance_score']:.2f}")
        else:
            print("No recommendations found.")

    # 6. Test keyword search functionality
    print("\n8. Testing keyword search functionality...")
    keywords = "mystery detective"
    print(f"Keywords: '{keywords}'")
    keyword_results = recommender.search_by_keywords(keywords, top_n=3)

    if keyword_results:
        print("Top 3 keyword matches:")
        for book in keyword_results:
            print(f"  - {book['title']} by {book['author']} - Match score: {book['keyword_match_score']}")
    else:
        print("No keyword matches found.")

    # 7. Find similar books
    print("\n9. Testing 'similar books' functionality...")
    # Use the first book in the dataset as an example
    if len(recommender.df) > 0:
        example_book_id = 0
        example_book = recommender.df.iloc[example_book_id]
        print(f"Finding books similar to: {example_book['Title']} by {example_book.get('Authors', 'Unknown')}")

        similar_books = recommender.get_similar_books(example_book_id, top_n=3)

        if similar_books:
            print("Top 3 similar books:")
            for book in similar_books:
                print(f"  - {book['title']} by {book['author']} - Similarity: {book['relevance_score']:.2f}")
        else:
            print("No similar books found.")

    print("\nBook Recommender Pipeline completed successfully!")
    return recommender



In [None]:
# Run the pipeline
if __name__ == "__main__":
    recommender = run_book_recommender_pipeline()

    # Interactive recommendation loop
    print("\n" + "="*50)
    print("Interactive Book Recommender")
    print("="*50)
    print("Enter your book preferences or 'exit' to quit")

    while True:
        user_input = input("\nWhat kind of books are you looking for? ")
        if user_input.lower() == 'exit':
            break

        recommendations = recommender.recommend_books(user_input, top_n=5)

        if recommendations:
            print("\nRecommended Books:")
            print("-" * 80)
            for book in recommendations:
                print(f"#{book['rank']} - {book['title']} by {book['author']}")
                print(f"    Category: {book['category']}")
                if book.get('year'):
                    print(f"    Year: {book['year']}")
                print(f"    Relevance Score: {book['relevance_score']:.4f}")
                if book.get('description'):
                    print(f"    Description: {book['description']}")
                print("-" * 80)
        else:
            print("\nNo recommendations found for your query. Try something else!")

        # Additional options
        print("\nAdditional options:")
        print("1. Get similar books to a recommendation")
        print("2. Search by keywords")
        print("3. Continue with a new query")

        option = input("Enter option (1-3): ")

        if option == '1':
            book_num = input("Enter recommendation number to find similar books: ")
            try:
                book_num = int(book_num)
                if 1 <= book_num <= len(recommendations):
                    # Get the book's ID from the original DataFrame
                    book_title = recommendations[book_num-1]['title']
                    book_author = recommendations[book_num-1]['author']

                    # Find the book ID in the DataFrame
                    matching_books = recommender.df[(recommender.df['Title'] == book_title) &
                                                   (recommender.df['Authors'] == book_author)]

                    if not matching_books.empty:
                        book_id = matching_books.index[0]
                        similar_books = recommender.get_similar_books(book_id, top_n=5)

                        if similar_books:
                            print("\nSimilar Books:")
                            print("-" * 80)
                            for book in similar_books:
                                print(f"#{book['rank']} - {book['title']} by {book['author']}")
                                print(f"    Category: {book['category']}")
                                if book.get('year'):
                                    print(f"    Year: {book['year']}")
                                print(f"    Similarity Score: {book['relevance_score']:.4f}")
                                if book.get('description'):
                                    print(f"    Description: {book['description']}")
                                print("-" * 80)
                        else:
                            print("No similar books found.")
                    else:
                        print("Book not found in database.")
                else:
                    print("Invalid recommendation number.")
            except ValueError:
                print("Please enter a valid number.")

        elif option == '2':
            keywords = input("Enter keywords to search for: ")
            keyword_results = recommender.search_by_keywords(keywords, top_n=5)

            if keyword_results:
                print("\nKeyword Search Results:")
                print("-" * 80)
                for book in keyword_results:
                    print(f"#{book['rank']} - {book['title']} by {book['author']}")
                    print(f"    Category: {book['category']}")
                    if book.get('year'):
                        print(f"    Year: {book['year']}")
                    print(f"    Match Score: {book['keyword_match_score']}")
                    if book.get('description'):
                        print(f"    Description: {book['description']}")
                    print("-" * 80)
            else:
                print("No books found matching those keywords.")

        elif option == '3':
            continue

        else:
            print("Invalid option. Continuing with a new query.")

    print("\nThank you for using the Book Recommender System!")

    # Optional: Visualization of recommendation scores
    try:
        # Create a simple bar chart of the last recommendations
        if 'recommendations' in locals() and recommendations:
            plt.figure(figsize=(10, 6))
            titles = [book['title'][:20] + '...' if len(book['title']) > 20 else book['title']
                     for book in recommendations]
            scores = [book['relevance_score'] for book in recommendations]

            plt.bar(range(len(scores)), scores, color='skyblue')
            plt.xticks(range(len(scores)), titles, rotation=45, ha='right')
            plt.xlabel('Book')
            plt.ylabel('Relevance Score')
            plt.title(f'Recommendation Scores for Query: "{user_input}"')
            plt.tight_layout()
            plt.show()
    except Exception as e:
        print(f"Could not create visualization: {str(e)}")