In [1]:
import pandas as pd

import joblib
import numpy as np

from scipy.sparse import csr_matrix
from surprise import SVD, Dataset, Reader, BaselineOnly, AlgoBase
from surprise.model_selection import train_test_split
from surprise import accuracy

In [2]:

books_df = pd.read_csv('books_df_processed.csv')
users_df = pd.read_csv('users_df_processed.csv')
rating_df = pd.read_csv('filtered_ratings.csv')

model = joblib.load('best_svd_model.joblib')

In [3]:
books_df.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...


In [7]:
import pandas as pd
import joblib
import numpy as np

def get_recos(user_id, n=10):
    rated_books = rating_df[(rating_df['User-ID'] == user_id) & (rating_df['Book-Rating'] > 0)]['ISBN'].unique()
    all_books = books_df['ISBN'].unique()
    unrated_books = sorted(set(all_books) - set(rated_books))

    predictions = []
    for book_id in unrated_books:
        try:
            pred = model.predict(uid=user_id, iid=book_id)
            predictions.append((book_id, pred.est))
        except:
            continue

    predictions.sort(key=lambda x: x[1], reverse=True)
    top_n = predictions[:n]

    recos_df = pd.DataFrame(top_n, columns=['ISBN', 'pred_rating'])
    recos_df = recos_df.merge(books_df, on='ISBN', how='left')

    return recos_df[['ISBN', 'Book-Title', 'Book-Author', 'Publisher', 'Year-Of-Publication', 'pred_rating']]

unique_users = rating_df['User-ID'].unique()
random_user = pd.Series(unique_users).sample(1).values[0]

print(f"Top recommendations for user: {random_user}\n")
recommendations = get_recos(random_user)
print(recommendations.head(10))


Top recommendations for user: 37034

         ISBN                                         Book-Title  \
0  0836213319                       Dilbert: A Book of Postcards   
1  0060256672       Where the Sidewalk Ends : Poems and Drawings   
2  1558532854       Postcards from Live and Learn and Pass It on   
3  1888054557   Postmarked Yesteryear: 30 Rare Holiday Postcards   
4  0312099045  Route 66 Postcards: Greetings from the Mother ...   
5  0877792208  The Official SCRABBLE (r) Players Dictionary, ...   
6  0689710682                   Mrs. Frisby and the Rats of Nimh   
7  089471838X                Natural California: A Postcard Book   
8  1565548353  Halloween: Romantic Art and Customs Of Yestery...   
9  0060194995  To Kill a Mockingbird : The 40th Anniversary E...   

                  Book-Author                   Publisher  \
0                 Scott Adams          Andrews McMeel Pub   
1            Shel Silverstein               HarperCollins   
2       H. Jackson, Jr. Brown  

In [7]:
def books_read(user_id):
    # Get all ratings by the user
    user_ratings = rating_df[(rating_df['User-ID'] == user_id) & (rating_df['Book-Rating'] > 0)]

    profile_df = user_ratings.copy()
    
    # Keep relevant columns
    profile_df = profile_df[['ISBN', 'Book-Title', 'Book-Author', 'Publisher', 
                             'Year-Of-Publication', 'Book-Rating']]

    # Sort by rating
    profile_df = profile_df.sort_values(by='Book-Rating', ascending=False)

    return profile_df


In [8]:
print(f"\n📖 Books already rated by user: {random_user}\n")
user_profile = books_read(random_user)
print(user_profile.head(10))


📖 Books already rated by user: 169681

             ISBN                                         Book-Title  \
64425  0590222953  The Magic School Bus Gets Baked in a Cake: A B...   
64427  0590400231  Magic School Bus Meets the Rot Squad: A Book o...   
64428  0590400258  The Magic School Bus Inside Ralphie: A Book Ab...   
64429  0590414313  The Magic School Bus on the Ocean Floor (Magic...   
64430  0590484133  The Magic School Bus Hops Home: A Book About A...   
64431  0590484141  The Magic School Bus Gets Eaten: A Book About ...   
64435  0816749280  Garfield and the Teacher Creature (Planet Read...   
64436  1885222556           Disneys Out and About With Pooh Volume 1   
64433  0717289079  Disney's Tarzan (Disney's Wonderful World of R...   
64426  0590313185                Bunnicula: A Rabbit-Tale of Mystery   

                         Book-Author                             Publisher  \
64425                    Joanna Cole                            Scholastic   
64427      

In [16]:
import pandas as pd
import joblib
import numpy as np
from surprise import SVD, Dataset, Reader

# Load data
books_df = pd.read_csv('books_df_processed.csv')
users_df = pd.read_csv('users_df_processed.csv')
rating_df = pd.read_csv('filtered_ratings.csv')

# Debug data
print(f"Unique users: {rating_df['User-ID'].nunique()}")
print(f"Unique books: {rating_df['ISBN'].nunique()}")
print(f"Rating distribution:\n{rating_df['Book-Rating'].value_counts()}")
print(f"Duplicate user-item pairs: {rating_df.duplicated(subset=['User-ID', 'ISBN']).sum()}")
print(f"Missing ISBNs: {len(set(rating_df['ISBN']) - set(books_df['ISBN']))}")

# Load model
model = joblib.load('best_svd_model.joblib')
print(f"Model parameters: n_factors={model.n_factors}, n_epochs={model.n_epochs}")

# Recommendation function
def get_recos(user_id, n=30):
    if user_id not in rating_df['User-ID'].values:
        print(f"User {user_id} not found! Returning empty recommendations.")
        return pd.DataFrame()

    # Get rated books
    rated_books = rating_df[(rating_df['User-ID'] == user_id) & (rating_df['Book-Rating'] > 0)]['ISBN'].unique()
    all_books = books_df['ISBN'].unique()
    unrated_books = list(set(all_books) - set(rated_books))

    # Predict ratings
    predictions = []
    for book_id in unrated_books:
        try:
            pred = model.predict(uid=user_id, iid=book_id)
            predictions.append((book_id, pred.est))
        except:
            continue

    # Sort and build recommendations
    predictions.sort(key=lambda x: x[1], reverse=True)
    top_n = predictions[:n]
    recos_df = pd.DataFrame(top_n, columns=['ISBN', 'pred_rating'])
    recos_df = recos_df.merge(books_df, on='ISBN', how='left')

    return recos_df[['ISBN', 'Book-Title', 'Book-Author', 'Publisher', 'Year-Of-Publication', 'pred_rating']]

# Test with multiple users
unique_users = rating_df['User-ID'].unique()
for user in np.random.choice(unique_users, 3):
    print(f"\nTop recommendations for user {user}:")
    recommendations = get_recos(user, n=5)
    print(recommendations[['ISBN', 'Book-Title', 'pred_rating']].head())

Unique users: 6511
Unique books: 43968
Rating distribution:
Book-Rating
8     24070
10    19526
7     17215
9     16577
5     11646
6      8186
4      1826
3      1184
2       533
1       343
Name: count, dtype: int64
Duplicate user-item pairs: 0
Missing ISBNs: 0
Model parameters: n_factors=100, n_epochs=25

Top recommendations for user 113277:
         ISBN                                         Book-Title  pred_rating
0  2895400644  J'Ã?Â©tais si timide que j'ai mordu la maÃ?Â®t...           10
1  0486280616  Adventures of Huckleberry Finn (Dover Thrift E...           10
2  0140320970                            The Twenty-One Balloons           10
3  0671743481  The University of Chicago Spanish - English En...           10
4  0965455572                                     Fortunes Rocks           10

Top recommendations for user 115572:
         ISBN                                         Book-Title  pred_rating
0  0836213319                       Dilbert: A Book of Postcards     