In [1]:
import pandas as pd

# Load your books.csv file
df = pd.read_csv("books.csv")

# View the first few rows
df.head()


Unnamed: 0,bookID,title,authors,average_rating,isbn,isbn13,language_code,num_pages,ratings_count,text_reviews_count,publication_date,publisher,Unnamed: 12
0,34889,Brown's Star Atlas: Showing All The Bright Sta...,Brown,Son & Ferguson,0.0,851742718,9.78E+12,eng,49,0,0,05-01-1977,Brown Son & Ferguson Ltd.
1,22128,Patriots (The Coming Collapse),James Wesley,Rawles,3.63,156384155X,9.78E+12,eng,342,38,4,1/15/1999,Huntington House Publishers
2,16914,The Tolkien Fan's Medieval Reader,David E. Smith (Turgon of TheOneRing.net,one of the founding members of this Tolkien w...,3.58,1593600119,9.78E+12,eng,400,26,4,04-06-2004,Cold Spring Press
3,12224,Streetcar Suburbs: The Process of Growth in Bo...,Sam Bass Warner,Jr./Sam B. Warner,3.58,674842111,9.78E+12,en-US,236,61,6,4/20/2004,Harvard University Press
4,2034,Comoediae 1: Acharenses/Equites/Nubes/Vespae/P...,Aristophanes/F.W. Hall/W.M. Geldart,5,198145047.0,9.78E+12,grc,364,0,0,2/22/1922,Oxford University Press USA,


In [2]:
# Convert rating-related fields to numeric
df['average_rating'] = pd.to_numeric(df['average_rating'], errors='coerce')
df['ratings_count'] = pd.to_numeric(df.get('ratings_count', pd.Series([0]*len(df))), errors='coerce')

# Drop missing values
df = df[['title', 'authors', 'average_rating', 'ratings_count']].dropna().drop_duplicates()
df.head()


Unnamed: 0,title,authors,average_rating,ratings_count
4,Comoediae 1: Acharenses/Equites/Nubes/Vespae/P...,Aristophanes/F.W. Hall/W.M. Geldart,5.0,0
5,Willem de Kooning: Late Paintings,Julie Sylvester/David Sylvester,5.0,1
6,Literature Circle Guide: Bridge to Terabithia:...,Tara MacCarthy,5.0,4
7,Middlesex Borough (Images of America: New Jersey),Middlesex Borough Heritage Committee,5.0,2
8,Zone of the Enders: The 2nd Runner Official St...,Tim Bogenn,5.0,2


In [4]:
# Filter and sort top-rated books with at least 100 ratings
def get_top_books(n=10):
    filtered = df[df['ratings_count'] >= 100]
    top_books = filtered.sort_values(by=['average_rating', 'ratings_count'], ascending=False).head(n)
    return top_books[['title', 'authors', 'average_rating', 'ratings_count']]

# Show top 10 books
get_top_books(10)


Unnamed: 0,title,authors,average_rating,ratings_count
30,The Complete Calvin and Hobbes,Bill Watterson,4.82,32213
32,Harry Potter Boxed Set Books 1-5 (Harry Potte...,J.K. Rowling/Mary GrandPr√©,4.78,41428
34,It's a Magical World (Calvin and Hobbes #11),Bill Watterson,4.76,23875
37,Harry Potter Collection (Harry Potter #1-6),J.K. Rowling,4.73,28242
38,Early Color,Saul Leiter/Martin Harrison,4.73,144
39,Homicidal Psycho Jungle Cat (Calvin and Hobbes...,Bill Watterson,4.72,15365
40,Elliott Erwitt: Snaps,Murray Sayle/Charles Flowers/Elliott Erwitt,4.72,102
43,Calvin and Hobbes: Sunday Pages 1985-1995: An ...,Bill Watterson,4.71,3613
45,Study Bible: NIV,Anonymous,4.7,4166
46,The Complete Aubrey/Maturin Novels (5 Volumes),Patrick O'Brian,4.7,1338


In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

# Combine title and author for feature extraction
df['combined'] = df['title'] + " " + df['authors']

# Vectorize using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df['combined'])

# Fit KNN model with cosine distance
model = NearestNeighbors(metric='cosine', algorithm='brute')
model.fit(tfidf_matrix)

# Title to index mapping
title_to_index = pd.Series(df.index, index=df['title']).drop_duplicates()


In [6]:
def recommend_books(title, n=5):
    if title not in title_to_index:
        return f"‚ùå Book '{title}' not found."

    idx = title_to_index[title]
    distances, indices = model.kneighbors(tfidf_matrix[idx], n_neighbors=n+1)

    recs = []
    for i in range(1, len(indices[0])):
        book = df.iloc[indices[0][i]]
        recs.append({
            'Title': book['title'],
            'Author': book['authors'],
            'Avg Rating': book['average_rating']
        })

    return pd.DataFrame(recs)


In [7]:
get_top_books(10)


Unnamed: 0,title,authors,average_rating,ratings_count
30,The Complete Calvin and Hobbes,Bill Watterson,4.82,32213
32,Harry Potter Boxed Set Books 1-5 (Harry Potte...,J.K. Rowling/Mary GrandPr√©,4.78,41428
34,It's a Magical World (Calvin and Hobbes #11),Bill Watterson,4.76,23875
37,Harry Potter Collection (Harry Potter #1-6),J.K. Rowling,4.73,28242
38,Early Color,Saul Leiter/Martin Harrison,4.73,144
39,Homicidal Psycho Jungle Cat (Calvin and Hobbes...,Bill Watterson,4.72,15365
40,Elliott Erwitt: Snaps,Murray Sayle/Charles Flowers/Elliott Erwitt,4.72,102
43,Calvin and Hobbes: Sunday Pages 1985-1995: An ...,Bill Watterson,4.71,3613
45,Study Bible: NIV,Anonymous,4.7,4166
46,The Complete Aubrey/Maturin Novels (5 Volumes),Patrick O'Brian,4.7,1338


In [8]:
recommend_books("The Hobbit", 5)  # Replace with any title from your dataset


Unnamed: 0,Title,Author,Avg Rating
0,The Glass Castle,Jeannette Walls/Julia Gibson,4.27
1,The Shrouded Walls,Susan Howatch,3.4
2,Watchmen on the Walls,Hannah Hurnard,4.02
3,Blandings Castle (Blandings Castle #3),P.G. Wodehouse,4.18
4,The Last Castle,Jack Vance,3.79


In [9]:
df['title'].sample(20).tolist()  # Show 20 random book titles


["The Capture (Guardians of Ga'Hoole  #1)",
 'Data Structures and Algorithms in Java',
 'Neverwhere (London Below  #1)',
 'Mr. and Mistress (Dynasties: The Elliotts #5)',
 "A Robin McKinley Collection: Spindle's End  The Hero and the Crown and The Blue Sword (Folktales #1-3)",
 'The Screwtape Letters/Book & Study Guide',
 "Surely You're Joking  Mr. Feynman!: Adventures of a Curious Character",
 "The Heights of Courage: A Tank Leader's War on the Golan",
 'Swell Foop (Xanth #25)',
 'Ultimate Punishment',
 'Virgin',
 'Trial by Fire (Newpointe 911 #4)',
 'Divine By Choice (Partholon  #2)',
 'Sacrament',
 'From the Corner of His Eye',
 "A Young Person's Guide To Philosophy",
 'Angels',
 'Runaways  Vol. 1: Pride and Joy',
 'My First Word Touch and Feel',
 'Poetry and Prose of Alexander Pope (Riverside Editions)']

In [10]:
!jupyter nbextension enable --py widgetsnbextension --sys-prefix


usage: jupyter [-h] [--version] [--config-dir] [--data-dir] [--runtime-dir]
               [--paths] [--json] [--debug]
               [subcommand]

Jupyter: Interactive Computing

positional arguments:
  subcommand     the subcommand to launch

options:
  -h, --help     show this help message and exit
  --version      show the versions of core jupyter packages and exit
  --config-dir   show Jupyter config dir
  --data-dir     show Jupyter data dir
  --runtime-dir  show Jupyter runtime dir
  --paths        show all Jupyter paths. Add --json for machine-readable
                 format.
  --json         output paths as machine-readable json
  --debug        output debug information about paths

Available subcommands: kernel kernelspec migrate run troubleshoot

Jupyter command `jupyter-nbextension` not found.


In [11]:
import ipywidgets as widgets
from IPython.display import display, clear_output


In [12]:
# Create dropdown from unique book titles
book_dropdown = widgets.Dropdown(
    options=sorted(df['title'].unique()),
    description='üìñ Book:',
    layout=widgets.Layout(width='80%')
)

# Button to trigger recommendation
button = widgets.Button(
    description='Get Recommendations',
    button_style='success'
)

# Output area to show results
output = widgets.Output()

# Function to handle button click
def on_button_click(b):
    with output:
        clear_output()
        title = book_dropdown.value
        print(f"üìö Similar books to: **{title}**\n")
        display(recommend_books(title, 5))

# Link button to function
button.on_click(on_button_click)

# Display widgets
display(book_dropdown, button, output)


Dropdown(description='üìñ Book:', layout=Layout(width='80%'), options=('  said the shotgun to the head.', '$30 F‚Ä¶

Button(button_style='success', description='Get Recommendations', style=ButtonStyle())

Output()

In [13]:
import ipywidgets as widgets
from IPython.display import display, clear_output

# --- Widget Elements ---

# Book dropdown
book_dropdown = widgets.Dropdown(
    options=sorted(df['title'].unique()),
    description='üìñ Book:',
    layout=widgets.Layout(width='95%')
)

# Slider for number of recommendations
rec_slider = widgets.IntSlider(
    value=5,
    min=1,
    max=10,
    step=1,
    description='üî¢ Recommendations:',
    continuous_update=False
)

# Button to get similar books
recommend_button = widgets.Button(
    description='Get Similar Books',
    button_style='primary'
)

# Output for similar books
recommend_output = widgets.Output()

# Output for top-rated books
top_rated_output = widgets.Output()

# --- Logic Functions ---

def on_recommend_button_click(b):
    with recommend_output:
        clear_output()
        selected_title = book_dropdown.value
        n = rec_slider.value
        print(f"üìö Similar books to: **{selected_title}**\n")
        display(recommend_books(selected_title, n))

def show_top_books():
    with top_rated_output:
        clear_output()
        print("‚≠ê Top-Rated Books (with 100+ ratings):\n")
        display(get_top_books(10))

# Bind the button to the function
recommend_button.on_click(on_recommend_button_click)

# Call top-rated display on load
show_top_books()

# --- Layout Using Tabs ---

tab_nest = widgets.Tab()

tab1_box = widgets.VBox([book_dropdown, rec_slider, recommend_button, recommend_output])
tab2_box = widgets.VBox([top_rated_output])

tab_nest.children = [tab1_box, tab2_box]
tab_nest.set_title(0, 'üîç Recommend Similar')
tab_nest.set_title(1, '‚≠ê Top Rated')

# Display tabs
display(tab_nest)


Tab(children=(VBox(children=(Dropdown(description='üìñ Book:', layout=Layout(width='95%'), options=('  said the ‚Ä¶