In [1]:
import csv
import ast
from typing import List, Dict

def filter_books_by_genres(csv_file: str, target_genres: List[str], match_all: bool = False) -> List[Dict]:
    """
    Filter books from a CSV file based on one or more genres.
    
    Args:
        csv_file (str): Path to the CSV file
        target_genres (List[str]): List of genres to search for
        match_all (bool): If True, books must match all target genres
                         If False, books must match at least one target genre
        
    Returns:
        list: List of dictionaries containing book information that match the genre criteria
    """
    matching_books = []
    # Convert target genres to lowercase for case-insensitive matching
    target_genres = [genre.lower() for genre in target_genres]
    
    with open(csv_file, 'r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        for row in reader:
            # Convert string representation of list to actual list
            book_genres = [genre.lower() for genre in ast.literal_eval(row['genres'])]
            
            if match_all:
                # Check if all target genres are in book's genres
                if all(genre in book_genres for genre in target_genres):
                    matching_books.append(row)
            else:
                # Check if any target genre is in book's genres
                if any(genre in book_genres for genre in target_genres):
                    matching_books.append(row)
    
    return matching_books

# Example usage
if __name__ == "__main__":
    # Example file path and genres
    file_path = "cleaned_books_data.csv"
    
    # Example 1: Match any genre from the list
    search_genres = ["Mystery", "Thriller"]
    try:
        # results = filter_books_by_genres(file_path, search_genres, match_all=False)
        # print(f"Found {len(results)} books matching any of these genres: {search_genres}")
        # for book in results:
        #     print(f"- {book['title']} by {book['author']}")
            
        # Example 2: Match all genres from the list
        results = filter_books_by_genres(file_path, search_genres, match_all=True)
        print(f"\nFound {len(results)} books matching all of these genres: {search_genres}")
        for book in results:
            print(f"- {book['title']} by {book['author']}")
            
    except FileNotFoundError:
        print("CSV file not found!")
    except KeyError as e:
        print(f"Required column missing in CSV: {e}")
    except Exception as e:
        print(f"An error occurred: {e}")


Found 158 books matching all of these genres: ['Mystery', 'Thriller']
- The Girl with the Dragon Tattoo by Stieg Larsson
- And Then There Were None by Agatha Christie
- In Cold Blood by Truman Capote
- The Godfather by Mario Puzo
- The Lovely Bones by Alice Sebold
- Mystic River by Dennis Lehane
- The Girl Who Played with Fire by Stieg Larsson
- A Story of Yesterday by Sergio Cobo
- The Girl Who Kicked the Hornet's Nest by Stieg Larsson
- The Secret History by Donna Tartt
- In the Woods by Tana French
- Murder on the Orient Express by Agatha Christie
- Dark Places by Gillian Flynn
- The Elephant Tree by R.D. Ronald
- Smilla's Sense of Snow by Peter Heg
- The Murder of Roger Ackroyd by Agatha Christie
- None of This Is True by Lisa Jewell
- Murder on Family Grounds by Susan  Rowland
- Case Histories by Kate Atkinson
- Twenty Years Later by Charlie Donlea
- Before I Go to Sleep by S.J. Watson
- The Girl on the Train by Paula Hawkins
- The Spy Who Came In from the Cold by John Le Carr
- 

In [3]:
import csv
import ast
from typing import List, Dict

def filter_books_by_genres(csv_file: str, target_genres: List[str], match_all: bool = False) -> List[Dict]:
    """
    Filter books from a CSV file based on one or more genres and sort by ratings.
    
    Args:
        csv_file (str): Path to the CSV file
        target_genres (List[str]): List of genres to search for
        match_all (bool): If True, books must match all target genres
                         If False, books must match at least one target genre
        
    Returns:
        list: List of dictionaries containing book information that match the genre criteria,
              sorted by ratings in descending order
    """
    matching_books = []
    # Convert target genres to lowercase for case-insensitive matching
    target_genres = [genre.lower() for genre in target_genres]
    
    with open(csv_file, 'r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        for row in reader:
            # Convert string representation of list to actual list
            book_genres = [genre.lower() for genre in ast.literal_eval(row['genres'])]
            
            # Convert rating to float for sorting
            try:
                row['rating'] = float(row['rating'])
            except (ValueError, KeyError):
                row['rating'] = 0.0  # Default rating if missing or invalid
            
            if match_all:
                # Check if all target genres are in book's genres
                if all(genre in book_genres for genre in target_genres):
                    matching_books.append(row)
            else:
                # Check if any target genre is in book's genres
                if any(genre in book_genres for genre in target_genres):
                    matching_books.append(row)
    
    # Sort the matching books by rating in descending order
    matching_books.sort(key=lambda x: x['rating'], reverse=True)
    return matching_books

# Example usage
if __name__ == "__main__":
    # Example file path and genres
    file_path = "cleaned_books_data.csv"
    
    # Example 1: Match any genre from the list
    search_genres = ["fantasy", "mystery"]
    try:
        # results = filter_books_by_genres(file_path, search_genres, match_all=False)
        # print(f"Found {len(results)} books matching any of these genres: {search_genres}")
        # print("\nTop rated books:")
        # for book in results:
        #     print(f"- {book['title']} by {book['author']} (Rating: {book['rating']})")
            
        # Example 2: Match all genres from the list
        results = filter_books_by_genres(file_path, search_genres, match_all=True)
        print(f"\nFound {len(results)} books matching all of these genres: {search_genres}")
        print("\nTop rated books:")
        for book in results:
            print(f"- {book['title']} by {book['author']} (Rating: {book['rating']})")
            
    except FileNotFoundError:
        print("CSV file not found!")
    except KeyError as e:
        print(f"Required column missing in CSV: {e}")
    except Exception as e:
        print(f"An error occurred: {e}")


Found 80 books matching all of these genres: ['fantasy', 'mystery']

Top rated books:
- The Green Mile by Stephen        King (Rating: 4.48)
- Cold Days by Jim  Butcher (Rating: 4.47)
- Dead Beat by Jim  Butcher (Rating: 4.41)
- The Faceless Ones by Derek Landy (Rating: 4.4)
- Boy's Life by Robert McCammon (Rating: 4.4)
- Proven Guilty by Jim  Butcher (Rating: 4.39)
- White Night by Jim  Butcher (Rating: 4.38)
- Different Seasons by Stephen        King (Rating: 4.36)
- The Deptford Trilogy by Robertson Davies (Rating: 4.32)
- Feet of Clay by Terry Pratchett (Rating: 4.32)
- Leviathan Wakes by James S.A. Corey (Rating: 4.31)
- The Shadow of the Wind by Carlos Ruiz Zafn (Rating: 4.3)
- Blood Rites by Jim  Butcher (Rating: 4.29)
- Playing with Fire by Derek Landy (Rating: 4.29)
- The Shining by Stephen        King (Rating: 4.28)
- It by Stephen        King (Rating: 4.24)
- The Evolution of Mara Dyer by Michelle Hodkin (Rating: 4.24)
- The Mysterious Benedict Society by Trenton Lee Stewar

In [9]:
import csv
import ast
import pickle
from typing import List, Dict, Optional
from pathlib import Path

class BookModel:
    """
    A model class to store and filter book data.
    """
    def __init__(self):
        self.books: List[Dict] = []
        
    def load_from_csv(self, csv_file: str) -> None:
        """
        Load book data from a CSV file into the model.
        
        Args:
            csv_file (str): Path to the CSV file
        """
        with open(csv_file, 'r', encoding='utf-8') as file:
            reader = csv.DictReader(file)
            for row in reader:
                # Convert string representation of list to actual list and normalize genres
                row['genres'] = [genre.lower() for genre in ast.literal_eval(row['genres'])]
                
                # Convert rating to float
                try:
                    row['rating'] = float(row['rating'])
                except (ValueError, KeyError):
                    row['rating'] = 0.0
                
                self.books.append(row)
    
    def filter_by_genres(self, target_genres: List[str], match_all: bool = False) -> List[Dict]:
        """
        Filter books based on one or more genres.
        
        Args:
            target_genres (List[str]): List of genres to search for
            match_all (bool): If True, books must match all target genres
                            If False, books must match at least one target genre
        
        Returns:
            list: List of dictionaries containing matching books, sorted by rating
        """
        target_genres = [genre.lower() for genre in target_genres]
        matching_books = []
        
        for book in self.books:
            book_genres = book['genres']  # Already lowercase from load_from_csv
            
            if match_all:
                if all(genre in book_genres for genre in target_genres):
                    matching_books.append(book)
            else:
                if any(genre in book_genres for genre in target_genres):
                    matching_books.append(book)
        
        # Sort by rating in descending order
        return sorted(matching_books, key=lambda x: x['rating'], reverse=True)

def create_and_save_model(csv_file: str, model_path: str) -> None:
    """
    Create a BookModel from CSV data and save it to a pickle file.
    
    Args:
        csv_file (str): Path to the CSV file
        model_path (str): Path where the model will be saved
    """
    model = BookModel()
    try:
        model.load_from_csv(csv_file)
        with open(model_path, 'wb') as f:
            pickle.dump(model, f)
        print(f"Model created and saved successfully to {model_path}")
    except Exception as e:
        print(f"Error creating model: {e}")

def load_model(model_path: str) -> Optional[BookModel]:
    """
    Load a BookModel from a pickle file.
    
    Args:
        model_path (str): Path to the pickle file
        
    Returns:
        BookModel or None: The loaded model, or None if loading fails
    """
    try:
        with open(model_path, 'rb') as f:
            return pickle.load(f)
    except Exception as e:
        print(f"Error loading model: {e}")
        return None

def filter_books_by_genres(model_path: str, target_genres: List[str], 
                         match_all: bool = False) -> Optional[List[Dict]]:
    """
    Filter books using a saved model based on genres.
    
    Args:
        model_path (str): Path to the saved model file
        target_genres (List[str]): List of genres to search for
        match_all (bool): If True, books must match all target genres
        
    Returns:
        list or None: List of matching books or None if model loading fails
    """
    model = load_model(model_path)
    if not model:
        return None
    
    return model.filter_by_genres(target_genres, match_all)

# Example usage
if __name__ == "__main__":
    # Example file paths
    csv_file_path = "cleaned_books_data.csv"
    model_file_path = "books_model.pkl"
    
    # First time setup: Create and save the model
    create_and_save_model(csv_file_path, model_file_path)
    
    # Example: Match any genre from the list
    search_genres = ["fantasy", "mystery"]
    try:
        results = filter_books_by_genres(
            model_file_path,
            search_genres,
            match_all=True
        )
        
        if results:
            print(f"\nFound {len(results)} books matching genres: {search_genres}")
            print("\nTop matching books:")
            for book in results:  
                print(f"- {book['title']} by {book['author']} (Rating: {book['rating']})")
                
    except Exception as e:
        print(f"An error occurred: {e}")

Model created and saved successfully to books_model.pkl

Found 80 books matching genres: ['fantasy', 'mystery']

Top matching books:
- The Green Mile by Stephen        King (Rating: 4.48)
- Cold Days by Jim  Butcher (Rating: 4.47)
- Dead Beat by Jim  Butcher (Rating: 4.41)
- The Faceless Ones by Derek Landy (Rating: 4.4)
- Boy's Life by Robert McCammon (Rating: 4.4)
- Proven Guilty by Jim  Butcher (Rating: 4.39)
- White Night by Jim  Butcher (Rating: 4.38)
- Different Seasons by Stephen        King (Rating: 4.36)
- The Deptford Trilogy by Robertson Davies (Rating: 4.32)
- Feet of Clay by Terry Pratchett (Rating: 4.32)
- Leviathan Wakes by James S.A. Corey (Rating: 4.31)
- The Shadow of the Wind by Carlos Ruiz Zafn (Rating: 4.3)
- Blood Rites by Jim  Butcher (Rating: 4.29)
- Playing with Fire by Derek Landy (Rating: 4.29)
- The Shining by Stephen        King (Rating: 4.28)
- It by Stephen        King (Rating: 4.24)
- The Evolution of Mara Dyer by Michelle Hodkin (Rating: 4.24)
- The Mys