In [None]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import cdist
import ipywidgets as widgets
from IPython.display import display, clear_output
import warnings

# Suppress warnings
warnings.filterwarnings("ignore")

class SimpleRecommender:
    def __init__(self):
        self.load_data()
        
    def load_data(self):
        # Load datasets
        print("Loading datasets...")
        self.df = pd.read_csv("/home/nouarif4/Downloads/Book_Cleaned_Dataset_.xls")
        self.clustered_books = pd.read_csv("/home/nouarif4/Documents/augment-projects/hh/clustered_books_train.csv")
        
        # Print column names for debugging
        print("Main dataset columns:", self.df.columns.tolist())
        print("Clustered dataset columns:", self.clustered_books.columns.tolist())
        
        # Ensure numeric features are properly converted
        numeric_features = ['Pages', 'Publication year', 'Price']
        
        # Convert columns to numeric in main dataset
        for col in numeric_features:
            if col in self.df.columns:
                self.df[col] = pd.to_numeric(self.df[col], errors='coerce')
                
        # Convert columns to numeric in clustered dataset
        for col in numeric_features:
            if col in self.clustered_books.columns:
                self.clustered_books[col] = pd.to_numeric(self.clustered_books[col], errors='coerce')
        
        # Drop rows with missing values in the features
        self.df = self.df.dropna(subset=numeric_features)
        self.clustered_books = self.clustered_books.dropna(subset=numeric_features)
        
        # Print data types after conversion
        print("\nMain dataset data types:")
        for col in numeric_features:
            print(f"{col}: {self.df[col].dtype}")
            
        print("\nClustered dataset data types:")
        for col in numeric_features:
            print(f"{col}: {self.clustered_books[col].dtype}")
        
        # Assign a fixed cluster to each book based on simple rules
        print("\nAssigning clusters to books using a simplified approach...")
        self.books_with_clusters = self.assign_clusters_simplified()
        
        # Print some statistics
        print(f"\nTotal books in main dataset: {len(self.df)}")
        print(f"Total books in clustered dataset: {len(self.clustered_books)}")
        print(f"Books with cluster assignments: {len(self.books_with_clusters)}")
        print(f"Number of unique clusters: {self.books_with_clusters['Cluster'].nunique()}")
    
    def assign_clusters_simplified(self):
        # Create a copy of the main dataset
        books_with_clusters = self.df.copy()
        
        # Get unique clusters from the clustered dataset
        unique_clusters = self.clustered_books['Cluster'].unique()
        num_clusters = len(unique_clusters)
        
        if num_clusters == 0:
            # If no clusters found, assign all to cluster 1
            books_with_clusters['Cluster'] = 1
            return books_with_clusters
        
        # Create bins for price ranges
        price_bins = np.linspace(
            self.df['Price'].min(), 
            self.df['Price'].max(), 
            num_clusters + 1
        )
        
        # Assign clusters based on price bins
        books_with_clusters['Cluster'] = pd.cut(
            books_with_clusters['Price'], 
            bins=price_bins, 
            labels=unique_clusters,
            include_lowest=True
        )
        
        # Fill any NaN clusters with the most common cluster
        most_common_cluster = books_with_clusters['Cluster'].mode()[0]
        books_with_clusters['Cluster'] = books_with_clusters['Cluster'].fillna(most_common_cluster)
        
        return books_with_clusters

    def find_matching_books(self, partial_title):
        """Find books that contain the partial title"""
        matching_books = self.df[self.df['Title'].str.contains(partial_title, case=False, na=False)]
        return matching_books['Title'].tolist()

    def get_recommendations(self, favorite_books, n_recommendations=5):
        all_recommendations = []
        
        for book_title in favorite_books:
            try:
                # First check if book exists in main dataset
                if book_title not in self.df['Title'].values:
                    print(f"Book '{book_title}' not found in main database")
                    continue
                    
                # Get the book details and its cluster
                book_with_cluster = self.books_with_clusters[self.books_with_clusters['Title'] == book_title]
                if len(book_with_cluster) == 0:
                    print(f"Book '{book_title}' not found in processed dataset")
                    continue
                    
                book = book_with_cluster.iloc[0]
                cluster = book['Cluster']
                
                # Find similar books in the same cluster
                similar_books = self.books_with_clusters[
                    (self.books_with_clusters['Cluster'] == cluster) &
                    (self.books_with_clusters['Title'] != book_title)
                ]
                
                # Check if we found any similar books
                if len(similar_books) == 0:
                    print(f"No similar books found in cluster {cluster} for '{book_title}'")
                    continue
                
                # Calculate similarity based on price difference
                book_price = book['Price']
                similar_books = similar_books.copy()
                similar_books['Similarity'] = 1 / (1 + abs(similar_books['Price'] - book_price))
                
                recommendations = similar_books.nlargest(n_recommendations, 'Similarity')
                all_recommendations.append({
                    'input_book': book_title,
                    'cluster': cluster,
                    'recommendations': recommendations
                })
                
            except Exception as e:
                print(f"Error processing '{book_title}': {str(e)}")
                continue
                
        return all_recommendations

def create_recommendation_interface():
    recommender = SimpleRecommender()
    
    title_input = widgets.Text(
        value='',
        placeholder='أدخل جزء من عنوان الكتاب',
        description='عنوان الكتاب:',
        layout=widgets.Layout(width='50%')
    )
    
    search_button = widgets.Button(
        description='بحث عن الكتاب',
        button_style='info'
    )
    
    book_dropdown = widgets.Dropdown(
        options=[],
        description='اختر الكتاب:',
        disabled=True,
        layout=widgets.Layout(width='70%')
    )
    
    add_button = widgets.Button(
        description='أضف للمفضلة',
        button_style='success',
        disabled=True
    )
    
    get_recommendations_button = widgets.Button(
        description='احصل على التوصيات',
        button_style='primary'
    )
    
    output = widgets.Output()
    favorite_books = []
    
    def on_search_clicked(b):
        with output:
            clear_output()
            if title_input.value:
                matching_books = recommender.find_matching_books(title_input.value)
                if matching_books:
                    book_dropdown.options = matching_books
                    book_dropdown.disabled = False
                    add_button.disabled = False
                    print(f"تم العثور على {len(matching_books)} كتاب")
                else:
                    print("لم يتم العثور على كتب مطابقة")
                    book_dropdown.options = []
                    book_dropdown.disabled = True
                    add_button.disabled = True
    
    def on_add_clicked(b):
        with output:
            clear_output()
            if book_dropdown.value and book_dropdown.value not in favorite_books:
                favorite_books.append(book_dropdown.value)
                print("الكتب المفضلة:")
                for i, book in enumerate(favorite_books, 1):
                    print(f"{i}. {book}")
    
    def on_recommend_clicked(b):
        with output:
            clear_output()
            if not favorite_books:
                print("الرجاء إضافة كتاب واحد على الأقل")
                return
                
            print("جاري البحث عن التوصيات...")
            recommendations = recommender.get_recommendations(favorite_books)
            
            print("\n=== التوصيات ===")
            for rec in recommendations:
                print(f"\nبناءً على كتاب: {rec['input_book']}")
                print(f"المجموعة: {rec['cluster']}")
                print("\nالكتب المقترحة:")
                for _, book in rec['recommendations'].iterrows():
                    print(f"\n- {book['Title']}")
                    print(f"  السعر: {book['Price']:.2f}")
                    print(f"  سنة النشر: {int(book['Publication year'])}")
                    print(f"  عدد الصفحات: {int(book['Pages'])}")
                    print(f"  درجة التشابه: {book['Similarity']:.2f}")
    
    search_button.on_click(on_search_clicked)
    add_button.on_click(on_add_clicked)
    get_recommendations_button.on_click(on_recommend_clicked)
    
    # Display the interface
    display(widgets.VBox([
        title_input,
        search_button,
        book_dropdown,
        add_button,
        get_recommendations_button,
        output
    ]))

# Create and display the recommendation interface
create_recommendation_interface()

Loading datasets...
Main dataset columns: ['Title', 'Author', 'Description', 'Pages', 'Publication year', 'Publisher', 'Category', 'Subcategory', 'Price', 'Page Range']
Clustered dataset columns: ['Pages', 'Publication year', 'Category', 'Subcategory', 'Price', 'Cluster']

Main dataset data types:
Pages: int64
Publication year: int64
Price: float64

Clustered dataset data types:
Pages: int64
Publication year: int64
Price: float64

Assigning clusters to books using a simplified approach...

Total books in main dataset: 3299
Total books in clustered dataset: 1979
Books with cluster assignments: 3299
Number of unique clusters: 5


VBox(children=(Text(value='', description='عنوان الكتاب:', layout=Layout(width='50%'), placeholder='أدخل جزء م…