In [5]:
import pandas as pd
import numpy as np
from collections import Counter
import ast
import pickle

class EfficientBookRecommender:
    def __init__(self, csv_file='cleaned_books_data.csv'):
        self.df = pd.read_csv(csv_file)
        self.df['genres'] = self.df['genres'].apply(self._safe_eval)
        self.genre_freq = self._compute_genre_frequencies()
    
    def _safe_eval(self, genre_str):
        return ast.literal_eval(genre_str) if pd.notna(genre_str) else []
    
    def _compute_genre_frequencies(self):
        return Counter([genre for genres in self.df['genres'] for genre in genres])
    
    def _compute_genre_similarity(self, genres1, genres2):
        common_genres = set(genres1) & set(genres2)
        return len(common_genres) / len(set(genres1)) if genres1 else 0
    
    def get_weighted_recommendations(self, book_title, n_recommendations=5):
        try:
            book_idx = self.df[self.df['title'] == book_title].index[0]
            input_book_genres = self.df.iloc[book_idx]['genres']
            
            recommendations = []
            for idx, row in self.df.iterrows():
                if idx == book_idx:
                    continue
                
                # Genre similarity
                genre_match = self._compute_genre_similarity(input_book_genres, row['genres'])
                
                # Popularity bonus
                popularity = np.mean([self.genre_freq.get(g, 0) for g in row['genres']]) if row['genres'] else 0
                max_popularity = max(self.genre_freq.values(), default=1)
                popularity_bonus = popularity / max_popularity
                
                # Rating factor
                rating_factor = min(float(row['rating']) / 5.0, 1.0) if pd.notna(row['rating']) else 0.5
                
                # Final score
                final_score = (
                    genre_match * 0.5 +
                    rating_factor * 0.3 +
                    popularity_bonus * 0.2
                )
                
                recommendations.append({
                    'title': row['title'],
                    'genres': row['genres'],
                    'final_score': final_score
                })
            
            return sorted(recommendations, key=lambda x: x['final_score'], reverse=True)[:n_recommendations]
            
        except Exception as e:
            print(f"Error: {str(e)}")
            return []

    def save_model(self, filename='book_recommender.pkl'):
        """Save the model to a pickle file"""
        try:
            with open(filename, 'wb') as file:
                pickle.dump({
                    'df': self.df,
                    'genre_freq': self.genre_freq
                }, file)
            print(f"Model saved successfully to {filename}")
        except Exception as e:
            print(f"Error saving model: {e}")

    @classmethod
    def load_model(cls, filename='book_recommender.pkl'):
        """Load the model from a pickle file"""
        try:
            with open(filename, 'rb') as file:
                saved_data = pickle.load(file)
            
            # Create a new instance and restore attributes
            recommender = cls.__new__(cls)
            recommender.df = saved_data['df']
            recommender.genre_freq = saved_data['genre_freq']
            return recommender
        except FileNotFoundError:
            print(f"File {filename} not found.")
            return None
        except Exception as e:
            print(f"Error loading model: {e}")
            return None

In [6]:
if __name__ == "__main__":
    # Create and save model
    recommender = EfficientBookRecommender('cleaned_books_data.csv')
    recommender.save_model()

    # Load saved model
    loaded_recommender = EfficientBookRecommender.load_model()
    
    # Use loaded model
    book_title = "The Sign of Four"
    recommendations = loaded_recommender.get_weighted_recommendations(book_title)
    
    print(f"Recommendations for '{book_title}':")
    for rec in recommendations:
        print(f"Title: {rec['title']}")
        print(f"Genres: {', '.join(rec['genres'])}")
        print(f"Score: {rec['final_score']:.3f}")
        print("-" * 50)

Model saved successfully to book_recommender.pkl
Recommendations for 'The Sign of Four':
Title: And Then There Were None
Genres: Mystery, Fiction, Thriller, Crime, Mystery Thriller, Classics, Audiobook
Score: 0.819
--------------------------------------------------
Title: The Murder of Roger Ackroyd
Genres: Mystery, Fiction, Crime, Mystery Thriller, Audiobook, Thriller, Classics
Score: 0.819
--------------------------------------------------
Title: The Hound of the Baskervilles
Genres: Mystery, Fiction, Crime, Classics, Mystery Thriller, Thriller, Audiobook
Score: 0.810
--------------------------------------------------
Title: The Adventures of Sherlock Holmes
Genres: Mystery, Fiction, Short Stories, Crime, Classics, Audiobook, Mystery Thriller
Score: 0.747
--------------------------------------------------
Title: A Study in Scarlet
Genres: Mystery, Fiction, Crime, Classics, Audiobook, Mystery Thriller, Historical Fiction
Score: 0.746
--------------------------------------------------
