In [29]:
!pip install gradio



In [4]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from difflib import get_close_matches
import gradio as gr


In [25]:
# Download and extract dataset (only needed once)
if not os.path.exists('BX-Books.csv'):
    !wget https://cdn.freecodecamp.org/project-data/books/book-crossings.zip
    !unzip book-crossings.zip


In [31]:
# Load and preprocess data
def load_data():
    try:
        df_books = pd.read_csv(
            'BX-Books.csv',
            encoding="ISO-8859-1",
            sep=";",
            header=0,
            names=['isbn', 'title', 'author'],
            usecols=['isbn', 'title', 'author'],
            dtype={'isbn': 'str', 'title': 'str', 'author': 'str'})
    except:
        df_books = pd.read_csv(
            'BX-Books.csv',
            encoding="cp1252",
            sep=";",
            header=0,
            names=['isbn', 'title', 'author'],
            usecols=['isbn', 'title', 'author'],
            dtype={'isbn': 'str', 'title': 'str', 'author': 'str'})
    
    df_ratings = pd.read_csv(
        'BX-Book-Ratings.csv',
        encoding="ISO-8859-1",
        sep=";",
        header=0,
        names=['user', 'isbn', 'rating'],
        usecols=['user', 'isbn', 'rating'],
        dtype={'user': 'int32', 'isbn': 'str', 'rating': 'float32'})
    
    return df_books, df_ratings

def preprocess_data(df_books, df_ratings):
    # Clean data
    df_books['title'] = df_books['title'].str.strip()
    df_books['author'] = df_books['author'].str.strip()
    
    # Filter for meaningful data
    book_counts = df_ratings['isbn'].value_counts()
    user_counts = df_ratings['user'].value_counts()
    
    df_ratings_filtered = df_ratings[
        df_ratings['isbn'].isin(book_counts[book_counts >= 100].index) & 
        df_ratings['user'].isin(user_counts[user_counts >= 50].index)
    ]
    
    # Merge and clean
    df = pd.merge(df_ratings_filtered, df_books, on='isbn')
    df = df.drop_duplicates(['user', 'title'])
    df = df[df['rating'] > 0]  # Remove zero/negative ratings
    
    return df



In [None]:
# Build recommendation model
def build_model(df):
    # Create book-user matrix
    book_user_mat = df.pivot_table(
        index='title', 
        columns='user', 
        values='rating', 
        aggfunc='mean'
    ).fillna(0)
    
    # Convert to sparse matrix
    book_user_mat_sparse = csr_matrix(book_user_mat.values)
    
    # Build KNN model
    model_knn = NearestNeighbors(
        metric='cosine', 
        algorithm='brute', 
        n_neighbors=11,
        n_jobs=-1
    )
    model_knn.fit(book_user_mat_sparse)
    
    return model_knn, book_user_mat

# Find closest title match
def find_closest_title(book_title, all_titles):
    matches = get_close_matches(book_title, all_titles, n=1, cutoff=0.6)
    return matches[0] if matches else None

# Load data and build model
print("Loading data and building model...")
df_books, df_ratings = load_data()
df_processed = preprocess_data(df_books, df_ratings)
model, book_user_mat = build_model(df_processed)
all_titles = book_user_mat.index.tolist()



In [32]:
# Recommendation function for Gradio
def recommend_books(book_title):
    # Find closest match if exact title not found
    if book_title not in all_titles:
        closest_match = find_closest_title(book_title, all_titles)
        if not closest_match:
            return "Book not found in database and no close matches found."
        book_title = closest_match
    
    try:
        book_idx = book_user_mat.index.get_loc(book_title)
        distances, indices = model.kneighbors(
            book_user_mat.iloc[book_idx, :].values.reshape(1, -1), 
            n_neighbors=6
        )
        
        recommendations = []
        for i in range(1, len(indices.flatten())):
            rec_title = book_user_mat.index[indices.flatten()[i]]
            similarity = round(1 - distances.flatten()[i], 2)
            if similarity > 0.1:
                recommendations.append(f"{rec_title} (similarity: {similarity})")
        
        if not recommendations:
            return "No sufficiently similar books found."
        
        output = f"Recommendations based on '{book_title}':\n\n" + "\n".join(recommendations)
        return output
    except Exception as e:
        return f"Error: {str(e)}"

# Create Gradio interface
with gr.Blocks(title="Book Recommendation System") as demo:
    gr.Markdown("# 📚 Book Recommendation System")
    gr.Markdown("Enter a book title to get recommendations")
    
    with gr.Row():
        book_input = gr.Textbox(
            label="Enter a book title",
            placeholder="e.g., The Hobbit, Harry Potter",
            scale=4
        )
        submit_btn = gr.Button("Get Recommendations", scale=1)
    
    output = gr.Textbox(label="Recommendations", lines=10)
    
    # Example books for quick testing
    gr.Examples(
        examples=[
            ["The Hobbit"],
            ["Harry Potter and the Sorcerer's Stone"],
            ["The Da Vinci Code"],
            ["To Kill a Mockingbird"],
            ["1984"]
        ],
        inputs=book_input
    )
    
    submit_btn.click(
        fn=recommend_books,
        inputs=book_input,
        outputs=output
    )

# Launch the interface
if __name__ == "__main__":
    demo.launch()

* Running on local URL:  http://127.0.0.1:7861
It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

* Running on public URL: https://445f4d83a6cf593fc8.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
