In [None]:
from dotenv import load_dotenv
import os
from huggingface_hub import InferenceClient
import pandas as pd
import numpy as np
from scipy.spatial.distance import cdist
import ipywidgets as widgets
from IPython.display import display, clear_output
import warnings
from huggingface_hub import InferenceClient
import os
from dotenv import load_dotenv
load_dotenv()

hf_token = os.getenv("HF_TOKEN")

model="HuggingFaceH4/zephyr-7b-beta "

client = InferenceClient(model=model, token=hf_token)

def queryLama(prompt):
    response = client.text_generation(prompt=prompt, max_new_tokens=300)
    return response

In [59]:
description = "A young girl discovers an ancient prophecy and must go on a journey across a magical kingdom."
category = "Fantasy"
genre = "Adventure"
prompt_a = (
            f"Here is a book description: {description}. "
            f"The book falls under the category {category} and is similar to books in the {genre} genre. "
            "explain why it would appeal to me the same reader. "
        )
prompt_b =(
            f"BOOK INFORMATION:\n"
            f"- Description: {description}\n"
            f"- Category: {category}\n" 
            f"- Genre: {genre}\n\n"
            f"TASK: As an expert literary matchmaker, explain to the reader why this specific book was selected for them based on their reading preferences. Highlight 2-3 compelling elements (characters, themes, writing style, etc.) that make this recommendation particularly suited to them.\n\n"
            f"REQUIREMENTS:\n"
            f"2. Keep your explanation brief and persuasive (3-5 sentences maximum)\n"
            f"3. Use a warm, enthusiastic tone that conveys genuine excitement about this recommendation\n"
            f"4. Begin with We chose this book for you because...\n"
            f"5. Focus on why this book matches the reader's preferences, not just general book information"
        )
print("Prompt a:\n"+queryLama(prompt_a))
print("Prompt b:\n"+ queryLama(prompt_b))

Prompt a:


Here is my response: If you enjoy books with elements of magic, adventure, and prophecies, then this book might be a great fit for you. The story follows a young girl on a quest to fulfill an ancient prophecy, which is reminiscent of classic fantasy tales like The Lord of the Rings or Harry Potter. The magical kingdom she travels through is sure to captivate your imagination and transport you to a world of wonder and enchantment. Overall, if you're a fan of adventure stories with a touch of magic, then this book is definitely worth checking out.
Prompt b:


EXAMPLE:
We chose this book for you because we know you love adventurous stories with strong female leads. The young girl in this fantasy novel embarks on a thrilling quest to fulfill an ancient prophecy, facing challenges and making alliances along the way. The writing style is richly descriptive, transporting you to a magical kingdom filled with wonder and danger. We think you'll be captivated by this captivating tale!

In [None]:

# Suppress warnings
import re


warnings.filterwarnings("ignore")

class SimpleRecommender:
    def __init__(self):
        self.load_data()

        # ✅ Load environment and set up Hugging Face inference ONCE
        load_dotenv()
        hf_token = os.getenv("HF_TOKEN")
        self.llama_client = InferenceClient(
            model=model, token=hf_token
        )

    def load_data(self):
        print("Loading datasets...")
        self.df = pd.read_csv("~/books/Cleaned Dataset/Book_Cleaned_Dataset_.xls")
        self.clustered_books = pd.read_csv("~/books/Unsupervised Learning/clustered_books_train.csv")

        print("Main dataset columns:", self.df.columns.tolist())
        print("Clustered dataset columns:", self.clustered_books.columns.tolist())

        numeric_features = ['Pages', 'Publication year', 'Price']
        for col in numeric_features:
            if col in self.df.columns:
                self.df[col] = pd.to_numeric(self.df[col], errors='coerce')
            if col in self.clustered_books.columns:
                self.clustered_books[col] = pd.to_numeric(self.clustered_books[col], errors='coerce')

        self.df = self.df.dropna(subset=numeric_features)
        self.clustered_books = self.clustered_books.dropna(subset=numeric_features)

        print("\nMain dataset data types:")
        for col in numeric_features:
            print(f"{col}: {self.df[col].dtype}")

        print("\nClustered dataset data types:")
        for col in numeric_features:
            print(f"{col}: {self.clustered_books[col].dtype}")

        print("\nAssigning clusters to books using a simplified approach...")
        self.books_with_clusters = self.assign_clusters_simplified()

        print(f"\nTotal books in main dataset: {len(self.df)}")
        print(f"Total books in clustered dataset: {len(self.clustered_books)}")
        print(f"Books with cluster assignments: {len(self.books_with_clusters)}")
        print(f"Number of unique clusters: {self.books_with_clusters['Cluster'].nunique()}")

    def assign_clusters_simplified(self):
        books_with_clusters = self.df.copy()
        unique_clusters = self.clustered_books['Cluster'].unique()
        num_clusters = len(unique_clusters)

        if num_clusters == 0:
            books_with_clusters['Cluster'] = 1
            return books_with_clusters

        price_bins = np.linspace(self.df['Price'].min(), self.df['Price'].max(), num_clusters + 1)

        books_with_clusters['Cluster'] = pd.cut(
            books_with_clusters['Price'],
            bins=price_bins,
            labels=unique_clusters,
            include_lowest=True
        )

        most_common_cluster = books_with_clusters['Cluster'].mode()[0]
        books_with_clusters['Cluster'] = books_with_clusters['Cluster'].fillna(most_common_cluster)

        return books_with_clusters

    def find_matching_books(self, partial_title):
        matching_books = self.df[self.df['Title'].str.contains(partial_title, case=False, na=False)]
        return matching_books['Title'].tolist()

    def get_recommendations(self, favorite_books, n_recommendations=3):
        all_recommendations = []

        for book_title in favorite_books:
            try:
                if book_title not in self.df['Title'].values:
                    print(f"Book '{book_title}' not found in main database")
                    continue

                # Get the full book data from the main dataset
                input_book_data = self.df[self.df['Title'] == book_title].iloc[0]
                
                book_with_cluster = self.books_with_clusters[self.books_with_clusters['Title'] == book_title]
                if len(book_with_cluster) == 0:
                    print(f"Book '{book_title}' not found in processed dataset")
                    continue

                book = book_with_cluster.iloc[0]
                cluster = book['Cluster']

                similar_books = self.books_with_clusters[
                    (self.books_with_clusters['Cluster'] == cluster) &
                    (self.books_with_clusters['Title'] != book_title)
                ]

                if len(similar_books) == 0:
                    print(f"No similar books found in cluster {cluster} for '{book_title}'")
                    continue

                book_price = book['Price']
                similar_books = similar_books.copy()
                similar_books['Similarity'] = 1 / (1 + abs(similar_books['Price'] - book_price))

                # Get the top recommendations
                top_recommendations = similar_books.nlargest(n_recommendations, 'Similarity')
                
                # For each recommendation, get the full book data from the main dataset
                full_recommendations = []
                for _, rec_row in top_recommendations.iterrows():
                    title = rec_row['Title']
                    # Get the full book data from the main dataset
                    full_book_data = self.df[self.df['Title'] == title]
                    
                    if len(full_book_data) > 0:
                        book_dict = full_book_data.iloc[0].to_dict()
                        # Add the similarity score and cluster from the recommendation
                        book_dict['Similarity'] = rec_row['Similarity']
                        book_dict['Cluster'] = rec_row['Cluster']
                        full_recommendations.append(book_dict)
                
                all_recommendations.append({
                    'input_book': book_title,
                    'input_book_data': input_book_data.to_dict(),
                    'cluster': cluster,
                    'recommendations': full_recommendations
                })

            except Exception as e:
                print(f"Error processing '{book_title}': {str(e)}")
                continue

        return all_recommendations

    def generate_llama_outputs(self, book_row):
        # Safely get fields with fallbacks
        description = book_row.get('Description', 'No description provided.')
        # Remove any numbered prefixes and repetitions if they exist
        description = re.sub(r'^\d+\.\s+', '', description)
        description = re.sub(r'(.+?)\s*\1+', r'\1', description)
        category = book_row.get('Category', 'General')
        subcategory = book_row.get('Subcategory', '')
        
        # Use subcategory as genre if available, otherwise use a placeholder
        genre = subcategory if subcategory else 'Fiction'

        prompt_a = (
            f"Here is a book description: {description}. "
            f"The book falls under the category {category} and is similar to books in the {genre} genre. "
            "explain why it would appeal to me the same reader."
        )

        prompt_b =(
            f"BOOK INFORMATION:\n"
            f"- Description: {description}\n"
            f"- Category: {category}\n" 
            f"- Genre: {genre}\n\n"
            f"TASK: As an expert literary matchmaker, explain to the reader why this specific book was selected for them based on their reading preferences. Highlight 2-3 compelling elements (characters, themes, writing style, etc.) that make this recommendation particularly suited to them.\n\n"
            f"REQUIREMENTS:\n"
            f"2. Keep your explanation brief and persuasive (3-5 sentences maximum)\n"
            f"3. Use a warm, enthusiastic tone that conveys genuine excitement about this recommendation\n"
            f"4. Begin with We chose this book for you because...\n"
            f"5. Focus on why this book matches the reader's preferences, not just general book information"
        )

        try:
            response_a = self.llama_client.text_generation(prompt=prompt_a, max_new_tokens=300)
            response_b = self.llama_client.text_generation(prompt=prompt_b, max_new_tokens=300)
    
            return response_a, response_b
        except Exception as e:
            return "❌ LLaMA Error", str(e)

def create_recommendation_interface():
    recommender = SimpleRecommender()

    title_input = widgets.Text(
        value='',
        placeholder='أدخل جزء من عنوان الكتاب',
        description='عنوان الكتاب:',
        layout=widgets.Layout(width='50%')
    )

    # Add observer to remove spaces from title input
    def on_title_change(change):
        title_input.value = change['new'].replace(' ', '')
    
    title_input.observe(on_title_change, names='value')

    search_button = widgets.Button(description='بحث عن الكتاب', button_style='info')
    book_dropdown = widgets.Dropdown(options=[], description='اختر الكتاب:', disabled=True, layout=widgets.Layout(width='70%'))
    add_button = widgets.Button(description='أضف للمفضلة', button_style='success', disabled=True)
    get_recommendations_button = widgets.Button(
        description='احصل على التوصيات', 
        button_style='', 
        disabled=True,
        tooltip='يجب إضافة كتاب واحد على الأقل للمفضلة'
    )
    
    # Explanation text widget
    explanation = widgets.HTML(
        value="<div style='padding: 10px; background-color: #f5f5f5; border-radius: 5px; margin: 10px 0;'>" +
              "<p style='margin: 0;'>اضف كتاب للمفضله للحصول على توصيات وسوف تحصل على توصيات أكثر دقة عند اضافة كتب اكثر</p>" +
              "</div>"
    )

    output = widgets.Output()
    favorite_books = []

    def update_recommendations_button():
        """Update the state and style of recommendations button based on favorite_books"""
        if len(favorite_books) > 0:
            get_recommendations_button.disabled = False
            get_recommendations_button.button_style = 'primary'
        else:
            get_recommendations_button.disabled = True
            get_recommendations_button.button_style = ''

    def on_search_clicked(b):
        with output:
            clear_output()
            if title_input.value:
                matching_books = recommender.find_matching_books(title_input.value)
                if matching_books:
                    book_dropdown.options = matching_books
                    book_dropdown.disabled = False
                    add_button.disabled = False
                    print(f"تم العثور على {len(matching_books)} كتاب")
                else:
                    print("لم يتم العثور على كتب مطابقة")
                    book_dropdown.options = []
                    book_dropdown.disabled = True
                    add_button.disabled = True

    def on_add_clicked(b):
        with output:
            clear_output()
            if book_dropdown.value and book_dropdown.value not in favorite_books:
                favorite_books.append(book_dropdown.value)
                print("الكتب المفضلة:")
                for i, book in enumerate(favorite_books, 1):
                    print(f"{i}. {book}")
                update_recommendations_button()

    def on_recommend_clicked(b):
        with output:
            clear_output()
            if not favorite_books:
                print("الرجاء إضافة كتاب واحد على الأقل")
                return

            print("جاري البحث عن التوصيات...")
            recommendations = recommender.get_recommendations(favorite_books)

            print("\n=== التوصيات ===")
            for rec in recommendations:
                print(f"\nبناءً على كتاب: {rec['input_book']}")
                print(f"المجموعة: {rec['cluster']}")
                print("\nالكتب المقترحة:")

                for book in rec['recommendations']:
                    print(f"\n- {book['Title']}")
                    print(f"  السعر: {book['Price']:.2f}")
                    print(f"  سنة النشر: {int(book['Publication year'])}")
                    print(f"  عدد الصفحات: {int(book['Pages'])}")
                    print(f"  درجة التشابه: {book['Similarity']:.2f}")

                    llama_rec, llama_summary = recommender.generate_llama_outputs(book)
                    print("\n📚  prompt a السبب")
                    print(llama_rec)
                    print("\n📖 prompt b السبب ")
                    print(llama_summary)

    search_button.on_click(on_search_clicked)
    add_button.on_click(on_add_clicked)
    get_recommendations_button.on_click(on_recommend_clicked)

    display(widgets.VBox([
        title_input,
        search_button,
        book_dropdown,
        add_button,
        explanation,  # Added explanation text here
        get_recommendations_button,
        output
    ]))

# Run the UI
create_recommendation_interface()

Loading datasets...
Main dataset columns: ['Title', 'Author', 'Description', 'Pages', 'Publication year', 'Publisher', 'Category', 'Subcategory', 'Price', 'Page Range']
Clustered dataset columns: ['Pages', 'Publication year', 'Category', 'Subcategory', 'Price', 'Cluster']

Main dataset data types:
Pages: int64
Publication year: int64
Price: float64

Clustered dataset data types:
Pages: int64
Publication year: int64
Price: float64

Assigning clusters to books using a simplified approach...

Total books in main dataset: 3299
Total books in clustered dataset: 1979
Books with cluster assignments: 3299
Number of unique clusters: 5


VBox(children=(Text(value='', description='عنوان الكتاب:', layout=Layout(width='50%'), placeholder='أدخل جزء م…