In [1]:
import numpy as np
import pandas as pd

In [2]:
from datetime import datetime
import sys

# Function to write output to both console and file
def write_output(content, print_content=True):
    """Write content to output.txt file and optionally print to console"""
    with open('output.txt', 'a', encoding='utf-8') as f:
        f.write(str(content) + '\n')
    if print_content:
        print(content)

# Clear the output file and add header
with open('output.txt', 'w', encoding='utf-8') as f:
    f.write("Book Recommendation System Output\n")
    f.write(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
    f.write("=" * 50 + '\n\n')

write_output("Starting Book Recommendation System Analysis...")

Starting Book Recommendation System Analysis...


In [3]:
write_output("Loading datasets...")
books = pd.read_csv('Books.csv')
ratings = pd.read_csv('Ratings.csv')
users = pd.read_csv('Users.csv')
write_output(f"Datasets loaded successfully!")
write_output(f"Books shape: {books.shape}")
write_output(f"Ratings shape: {ratings.shape}")
write_output(f"Users shape: {users.shape}")

Loading datasets...


  books = pd.read_csv('Books.csv')


Datasets loaded successfully!
Books shape: (271360, 8)
Ratings shape: (1149780, 3)
Users shape: (278858, 3)


In [4]:
books.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...


In [5]:
users.head()

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


In [6]:
ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [7]:
shapes = books.shape, ratings.shape, users.shape
write_output(f"Dataset shapes - Books: {shapes[0]}, Ratings: {shapes[1]}, Users: {shapes[2]}")
shapes

Dataset shapes - Books: (271360, 8), Ratings: (1149780, 3), Users: (278858, 3)


((271360, 8), (1149780, 3), (278858, 3))

In [8]:
null_books = books.isnull().sum()
write_output(f"Null values in Books dataset: {null_books.sum()}")
null_books

Null values in Books dataset: 7


ISBN                   0
Book-Title             0
Book-Author            2
Year-Of-Publication    0
Publisher              2
Image-URL-S            0
Image-URL-M            0
Image-URL-L            3
dtype: int64

In [9]:
null_ratings = ratings.isnull().sum()
write_output(f"Null values in Ratings dataset: {null_ratings.sum()}")
null_ratings

Null values in Ratings dataset: 0


User-ID        0
ISBN           0
Book-Rating    0
dtype: int64

In [10]:
null_users = users.isnull().sum()
write_output(f"Null values in Users dataset: {null_users.sum()}")
null_users

Null values in Users dataset: 110762


User-ID          0
Location         0
Age         110762
dtype: int64

In [11]:
duplicates = books.duplicated().sum(), ratings.duplicated().sum(), users.duplicated().sum()
write_output(f"Duplicate values - Books: {duplicates[0]}, Ratings: {duplicates[1]}, Users: {duplicates[2]}")
duplicates

Duplicate values - Books: 0, Ratings: 0, Users: 0


(0, 0, 0)

## Popularity based recommendation system

we will take top 50 books based on user ratings.
To be fair we have applied minimum cap on number of reviews
To find the rating for each book we would be required to work on ratings and books simentaneously.
So we will merge both of those data based on ISBN column as it is common in both the datasets.

In [12]:
write_output("\n" + "="*50)
write_output("POPULARITY BASED RECOMMENDATION SYSTEM")
write_output("="*50)


POPULARITY BASED RECOMMENDATION SYSTEM


In [13]:
ratings_with_name = ratings.merge(books, on='ISBN')
write_output(f"Merged ratings with books: {ratings_with_name.shape}")
write_output(f"Original ratings shape: {ratings.shape}")
write_output(f"Some books from ratings were not found in books dataset")
ratings_with_name
# as we can see that number of rows in ratings_with_name is less than ratings, it means that some of the books in ratings are not present in books dataset.
# So we will drop those rows from ratings dataset.

Merged ratings with books: (1031136, 10)
Original ratings shape: (1149780, 3)
Some books from ratings were not found in books dataset


Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,276725,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...,http://images.amazon.com/images/P/034545104X.0...
1,276726,0155061224,5,Rites of Passage,Judith Rae,2001,Heinle,http://images.amazon.com/images/P/0155061224.0...,http://images.amazon.com/images/P/0155061224.0...,http://images.amazon.com/images/P/0155061224.0...
2,276727,0446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...,http://images.amazon.com/images/P/0446520802.0...
3,276729,052165615X,3,Help!: Level 1,Philip Prowse,1999,Cambridge University Press,http://images.amazon.com/images/P/052165615X.0...,http://images.amazon.com/images/P/052165615X.0...,http://images.amazon.com/images/P/052165615X.0...
4,276729,0521795028,6,The Amsterdam Connection : Level 4 (Cambridge ...,Sue Leather,2001,Cambridge University Press,http://images.amazon.com/images/P/0521795028.0...,http://images.amazon.com/images/P/0521795028.0...,http://images.amazon.com/images/P/0521795028.0...
...,...,...,...,...,...,...,...,...,...,...
1031131,276704,0876044011,0,Edgar Cayce on the Akashic Records: The Book o...,Kevin J. Todeschi,1998,A.R.E. Press (Association of Research &amp; Enlig,http://images.amazon.com/images/P/0876044011.0...,http://images.amazon.com/images/P/0876044011.0...,http://images.amazon.com/images/P/0876044011.0...
1031132,276704,1563526298,9,Get Clark Smart : The Ultimate Guide for the S...,Clark Howard,2000,Longstreet Press,http://images.amazon.com/images/P/1563526298.0...,http://images.amazon.com/images/P/1563526298.0...,http://images.amazon.com/images/P/1563526298.0...
1031133,276706,0679447156,0,Eight Weeks to Optimum Health: A Proven Progra...,Andrew Weil,1997,Alfred A. Knopf,http://images.amazon.com/images/P/0679447156.0...,http://images.amazon.com/images/P/0679447156.0...,http://images.amazon.com/images/P/0679447156.0...
1031134,276709,0515107662,10,The Sherbrooke Bride (Bride Trilogy (Paperback)),Catherine Coulter,1996,Jove Books,http://images.amazon.com/images/P/0515107662.0...,http://images.amazon.com/images/P/0515107662.0...,http://images.amazon.com/images/P/0515107662.0...


In [14]:
num_rating_df = ratings_with_name.groupby('Book-Title').count()['Book-Rating'].reset_index()
num_rating_df.rename(columns={'Book-Rating': 'num_ratings'}, inplace=True)
num_rating_df

Unnamed: 0,Book-Title,num_ratings
0,A Light in the Storm: The Civil War Diary of ...,4
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1
4,Beyond IBM: Leadership Marketing and Finance ...,1
...,...,...
241066,Ã?Â?lpiraten.,2
241067,Ã?Â?rger mit Produkt X. Roman.,4
241068,Ã?Â?sterlich leben.,1
241069,Ã?Â?stlich der Berge.,3


In [15]:
avg_rating_df = ratings_with_name.groupby('Book-Title')['Book-Rating'].mean().reset_index()
avg_rating_df.rename(columns={'Book-Rating': 'avg_ratings'}, inplace=True)
avg_rating_df

Unnamed: 0,Book-Title,avg_ratings
0,A Light in the Storm: The Civil War Diary of ...,2.250000
1,Always Have Popsicles,0.000000
2,Apple Magic (The Collector's series),0.000000
3,"Ask Lily (Young Women of Faith: Lily Series, ...",8.000000
4,Beyond IBM: Leadership Marketing and Finance ...,0.000000
...,...,...
241066,Ã?Â?lpiraten.,0.000000
241067,Ã?Â?rger mit Produkt X. Roman.,5.250000
241068,Ã?Â?sterlich leben.,7.000000
241069,Ã?Â?stlich der Berge.,2.666667


In [16]:
popularity_df = num_rating_df.merge(avg_rating_df, on='Book-Title')
popularity_df

Unnamed: 0,Book-Title,num_ratings,avg_ratings
0,A Light in the Storm: The Civil War Diary of ...,4,2.250000
1,Always Have Popsicles,1,0.000000
2,Apple Magic (The Collector's series),1,0.000000
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1,8.000000
4,Beyond IBM: Leadership Marketing and Finance ...,1,0.000000
...,...,...,...
241066,Ã?Â?lpiraten.,2,0.000000
241067,Ã?Â?rger mit Produkt X. Roman.,4,5.250000
241068,Ã?Â?sterlich leben.,1,7.000000
241069,Ã?Â?stlich der Berge.,3,2.666667


In [17]:
popular_df = popularity_df[popularity_df['num_ratings'] >= 250].sort_values('avg_ratings', ascending=False).head(50)
popular_df

Unnamed: 0,Book-Title,num_ratings,avg_ratings
80434,Harry Potter and the Prisoner of Azkaban (Book 3),428,5.852804
80422,Harry Potter and the Goblet of Fire (Book 4),387,5.824289
80441,Harry Potter and the Sorcerer's Stone (Book 1),278,5.73741
80426,Harry Potter and the Order of the Phoenix (Boo...,347,5.501441
80414,Harry Potter and the Chamber of Secrets (Book 2),556,5.183453
191612,The Hobbit : The Enchanting Prelude to The Lor...,281,5.007117
187377,The Fellowship of the Ring (The Lord of the Ri...,368,4.94837
80445,Harry Potter and the Sorcerer's Stone (Harry P...,575,4.895652
211384,"The Two Towers (The Lord of the Rings, Part 2)",260,4.880769
219741,To Kill a Mockingbird,510,4.7


In [18]:
# now i need author and image url for each book in popular_df
# we have applied drop_duplicates on Book-Title column as there are multiple ISBNs for same book in books dataset.
popular_df = popular_df.merge(books, on='Book-Title').drop_duplicates('Book-Title')[['Book-Title', 'Book-Author', 'Image-URL-M', 'num_ratings', 'avg_ratings']].reset_index(drop=True)

# Log top 10 popular books to output file
write_output("\nTop 10 Popular Books:")
for i, (_, book) in enumerate(popular_df.head(10).iterrows(), 1):
    write_output(f"{i}. {book['Book-Title']} by {book['Book-Author']}")
    write_output(f"   Average Rating: {book['avg_ratings']:.2f}, Number of Ratings: {book['num_ratings']}")

popular_df


Top 10 Popular Books:
1. Harry Potter and the Prisoner of Azkaban (Book 3) by J. K. Rowling
   Average Rating: 5.85, Number of Ratings: 428
2. Harry Potter and the Goblet of Fire (Book 4) by J. K. Rowling
   Average Rating: 5.82, Number of Ratings: 387
3. Harry Potter and the Sorcerer's Stone (Book 1) by J. K. Rowling
   Average Rating: 5.74, Number of Ratings: 278
4. Harry Potter and the Order of the Phoenix (Book 5) by J. K. Rowling
   Average Rating: 5.50, Number of Ratings: 347
5. Harry Potter and the Chamber of Secrets (Book 2) by J. K. Rowling
   Average Rating: 5.18, Number of Ratings: 556
6. The Hobbit : The Enchanting Prelude to The Lord of the Rings by J.R.R. TOLKIEN
   Average Rating: 5.01, Number of Ratings: 281
7. The Fellowship of the Ring (The Lord of the Rings, Part 1) by J.R.R. TOLKIEN
   Average Rating: 4.95, Number of Ratings: 368
8. Harry Potter and the Sorcerer's Stone (Harry Potter (Paperback)) by J. K. Rowling
   Average Rating: 4.90, Number of Ratings: 575
9. T

Unnamed: 0,Book-Title,Book-Author,Image-URL-M,num_ratings,avg_ratings
0,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,http://images.amazon.com/images/P/0439136350.0...,428,5.852804
1,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,http://images.amazon.com/images/P/0439139597.0...,387,5.824289
2,Harry Potter and the Sorcerer's Stone (Book 1),J. K. Rowling,http://images.amazon.com/images/P/0590353403.0...,278,5.73741
3,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,http://images.amazon.com/images/P/043935806X.0...,347,5.501441
4,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,http://images.amazon.com/images/P/0439064872.0...,556,5.183453
5,The Hobbit : The Enchanting Prelude to The Lor...,J.R.R. TOLKIEN,http://images.amazon.com/images/P/0345339681.0...,281,5.007117
6,The Fellowship of the Ring (The Lord of the Ri...,J.R.R. TOLKIEN,http://images.amazon.com/images/P/0345339703.0...,368,4.94837
7,Harry Potter and the Sorcerer's Stone (Harry P...,J. K. Rowling,http://images.amazon.com/images/P/059035342X.0...,575,4.895652
8,"The Two Towers (The Lord of the Rings, Part 2)",J.R.R. TOLKIEN,http://images.amazon.com/images/P/0345339711.0...,260,4.880769
9,To Kill a Mockingbird,Harper Lee,http://images.amazon.com/images/P/0446310786.0...,510,4.7


Above we have successfully made book recommendation system based on popularity. Results have been saved to output.txt file.

In [19]:
write_output("Popularity-based recommendation system completed successfully!")

Popularity-based recommendation system completed successfully!


## Collaborative filtering based recommendation system

In [20]:
write_output("\n" + "="*50)
write_output("COLLABORATIVE FILTERING RECOMMENDATION SYSTEM")
write_output("="*50)


COLLABORATIVE FILTERING RECOMMENDATION SYSTEM


In [21]:
x = ratings_with_name.groupby('User-ID').count()['Book-Rating'] > 200
k_users = x[x].index
write_output(f"Users with more than 200 ratings: {len(k_users)}")
write_output(f"Total users: {len(x)}")

Users with more than 200 ratings: 811
Total users: 92106


In [22]:
filtered_rating = ratings_with_name[ratings_with_name['User-ID'].isin(k_users)]

In [23]:
y = filtered_rating.groupby('Book-Title').count()['Book-Rating'] > 50
famous_books = y[y].index
write_output(f"Books with more than 50 ratings: {len(famous_books)}")
write_output(f"Total books after user filtering: {len(y)}")

Books with more than 50 ratings: 679
Total books after user filtering: 155845


In [24]:
final_ratings = filtered_rating[filtered_rating['Book-Title'].isin(famous_books)]

In [25]:
final_ratings_clean = final_ratings.drop_duplicates()
write_output(f"Final dataset shape after filtering and cleaning: {final_ratings_clean.shape}")
write_output(f"Unique users in final dataset: {final_ratings_clean['User-ID'].nunique()}")
write_output(f"Unique books in final dataset: {final_ratings_clean['Book-Title'].nunique()}")
final_ratings_clean

Final dataset shape after filtering and cleaning: (57236, 10)
Unique users in final dataset: 810
Unique books in final dataset: 679


Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
1150,277427,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,http://images.amazon.com/images/P/002542730X.0...,http://images.amazon.com/images/P/002542730X.0...,http://images.amazon.com/images/P/002542730X.0...
1163,277427,0060930535,0,The Poisonwood Bible: A Novel,Barbara Kingsolver,1999,Perennial,http://images.amazon.com/images/P/0060930535.0...,http://images.amazon.com/images/P/0060930535.0...,http://images.amazon.com/images/P/0060930535.0...
1165,277427,0060934417,0,Bel Canto: A Novel,Ann Patchett,2002,Perennial,http://images.amazon.com/images/P/0060934417.0...,http://images.amazon.com/images/P/0060934417.0...,http://images.amazon.com/images/P/0060934417.0...
1168,277427,0061009059,9,One for the Money (Stephanie Plum Novels (Pape...,Janet Evanovich,1995,HarperTorch,http://images.amazon.com/images/P/0061009059.0...,http://images.amazon.com/images/P/0061009059.0...,http://images.amazon.com/images/P/0061009059.0...
1174,277427,006440188X,0,The Secret Garden,Frances Hodgson Burnett,1998,HarperTrophy,http://images.amazon.com/images/P/006440188X.0...,http://images.amazon.com/images/P/006440188X.0...,http://images.amazon.com/images/P/006440188X.0...
...,...,...,...,...,...,...,...,...,...,...
1029196,275970,1400031354,0,Tears of the Giraffe (No.1 Ladies Detective Ag...,Alexander McCall Smith,2002,Anchor,http://images.amazon.com/images/P/1400031354.0...,http://images.amazon.com/images/P/1400031354.0...,http://images.amazon.com/images/P/1400031354.0...
1029197,275970,1400031362,0,Morality for Beautiful Girls (No.1 Ladies Dete...,Alexander McCall Smith,2002,Anchor,http://images.amazon.com/images/P/1400031362.0...,http://images.amazon.com/images/P/1400031362.0...,http://images.amazon.com/images/P/1400031362.0...
1029270,275970,1573229725,0,Fingersmith,Sarah Waters,2002,Riverhead Books,http://images.amazon.com/images/P/1573229725.0...,http://images.amazon.com/images/P/1573229725.0...,http://images.amazon.com/images/P/1573229725.0...
1029309,275970,1586210661,9,Me Talk Pretty One Day,David Sedaris,2001,Time Warner Audio Major,http://images.amazon.com/images/P/1586210661.0...,http://images.amazon.com/images/P/1586210661.0...,http://images.amazon.com/images/P/1586210661.0...


In [26]:
pt = final_ratings.pivot_table(index='Book-Title', columns='User-ID', values='Book-Rating').fillna(0)
write_output(f"Pivot table created with shape: {pt.shape}")
write_output(f"Books in pivot table: {pt.shape[0]}")
write_output(f"Users in pivot table: {pt.shape[1]}")
pt

Pivot table created with shape: (679, 810)
Books in pivot table: 679
Users in pivot table: 810
Users in pivot table: 810


User-ID,254,2276,2766,2977,3363,4017,4385,6251,6323,6543,...,271705,273979,274004,274061,274301,274308,275970,277427,277639,278418
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2nd Chance,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4 Blondes,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Bend in the Road,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Year of Wonders,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
You Belong To Me,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zoya,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [27]:
from sklearn.metrics.pairwise import cosine_similarity # Calculate cosine similarity/euclidean distance
similarity_scores = cosine_similarity(pt)
write_output(f"Cosine similarity matrix created with shape: {similarity_scores.shape}")
similarity_scores.shape

Cosine similarity matrix created with shape: (679, 679)


(679, 679)

In [28]:
# now we will make a recommend function which will take book title as input and return top 5 similar books based on cosine similarity
def recommend(book_name): 
    try:
        index = np.where(pt.index==book_name)[0][0]  # get the index of the book name in the pivot table
        distance = similarity_scores[index]  # get the similarity scores for that book
        similar_books = sorted(list(enumerate(distance)), key=lambda x: x[1], reverse=True)[1:6]  # sort the books based on similarity scores and get top 5
        
        write_output(f"\nRecommendations for '{book_name}':")
        recommendations = []
        for i, (idx, score) in enumerate(similar_books, 1):
            book_title = pt.index[idx]
            write_output(f"  {i}. {book_title} (Similarity: {score:.4f})")
            recommendations.append(book_title)
            print(book_title)
        
        return recommendations
    except IndexError:
        error_msg = f"Book '{book_name}' not found in the dataset."
        write_output(error_msg)
        print(error_msg)
        return []

write_output("Recommendation function created successfully!")

Recommendation function created successfully!


In [29]:
write_output("\n" + "="*30)
write_output("TESTING RECOMMENDATIONS")
write_output("="*30)

# Test the recommendation system
recommendations = recommend('The Da Vinci Code')


TESTING RECOMMENDATIONS

Recommendations for 'The Da Vinci Code':
  1. Angels &amp; Demons (Similarity: 0.2882)
Angels &amp; Demons
  2. Touching Evil (Similarity: 0.2375)
Touching Evil
  3. Saving Faith (Similarity: 0.2211)
Saving Faith
  4. The Sweet Potato Queens' Book of Love (Similarity: 0.2134)
The Sweet Potato Queens' Book of Love
  5. Middlesex: A Novel (Similarity: 0.2133)
Middlesex: A Novel


In [30]:
# Test with multiple books
test_books = ['Angels & Demons', 'The Client', 'A Time to Kill', 'Interview with the Vampire']

for book in test_books:
    if book in pt.index:
        recommend(book)
    else:
        write_output(f"'{book}' not found in the filtered dataset")

# Final summary
write_output("\n" + "="*50)
write_output("ANALYSIS SUMMARY")
write_output("="*50)
write_output(f"Total books in original dataset: {len(books)}")
write_output(f"Total books used in collaborative filtering: {pt.shape[0]}")
write_output(f"Total users used in collaborative filtering: {pt.shape[1]}")
write_output(f"Popular books identified: {len(popular_df)}")
write_output("\nAnalysis completed! All results saved to output.txt")

'Angels & Demons' not found in the filtered dataset

Recommendations for 'The Client':
  1. The Pelican Brief (Similarity: 0.4447)
The Pelican Brief
  2. The Chamber (Similarity: 0.3864)
The Chamber
  3. A Time to Kill (Similarity: 0.3632)
A Time to Kill
  4. The Rainmaker (Similarity: 0.3539)
The Rainmaker
  5. The Firm (Similarity: 0.3400)
The Firm

Recommendations for 'A Time to Kill':
  1. The Pelican Brief (Similarity: 0.3943)
The Pelican Brief
  2. The Client (Similarity: 0.3632)
The Client
  3. The Firm (Similarity: 0.3312)
The Firm
  4. The Chamber (Similarity: 0.2905)
The Chamber
  5. The Rainmaker (Similarity: 0.2768)
The Rainmaker

Recommendations for 'Interview with the Vampire':
  1. The Vampire Lestat (Vampire Chronicles, Book II) (Similarity: 0.4253)
The Vampire Lestat (Vampire Chronicles, Book II)
  2. The Tale of the Body Thief (Vampire Chronicles (Paperback)) (Similarity: 0.3833)
The Tale of the Body Thief (Vampire Chronicles (Paperback))
  3. The Witching Hour (Lives