In [43]:
import pandas as pd
from surprise import SVD, Reader, Dataset
from surprise.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [44]:
ratings_df = pd.read_csv("ratings_cleaned.csv") 
users = pd.read_csv('users_cleaned.csv')
books_df = pd.read_csv('books_cleaned.csv')

In [45]:
ratings_df

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6
...,...,...,...
1149775,276704,1563526298,9
1149776,276706,0679447156,0
1149777,276709,0515107662,10
1149778,276721,0590442449,10


In [46]:
books_df

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,0195153448,classical mythology,mark p. o. morford,2002.0,oxford university press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,0002005018,clara callan,richard bruce wright,2001.0,harperflamingo canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,0060973129,decision in normandy,carlo d'este,1991.0,harperperennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,0374157065,flu: the story of the great influenza pandemic...,gina bari kolata,1999.0,farrar straus giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,0393045218,the mummies of urumchi,e. j. w. barber,1999.0,w. w. norton &amp; company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...
...,...,...,...,...,...,...,...,...
271355,0440400988,there's a bat in bunk five,paula danziger,1988.0,random house childrens pub (mm),http://images.amazon.com/images/P/0440400988.0...,http://images.amazon.com/images/P/0440400988.0...,http://images.amazon.com/images/P/0440400988.0...
271356,0525447644,from one to one hundred,teri sloat,1991.0,dutton books,http://images.amazon.com/images/P/0525447644.0...,http://images.amazon.com/images/P/0525447644.0...,http://images.amazon.com/images/P/0525447644.0...
271357,006008667X,lily dale : the true story of the town that ta...,christine wicker,2004.0,harpersanfrancisco,http://images.amazon.com/images/P/006008667X.0...,http://images.amazon.com/images/P/006008667X.0...,http://images.amazon.com/images/P/006008667X.0...
271358,0192126040,republic (world's classics),plato,1996.0,oxford university press,http://images.amazon.com/images/P/0192126040.0...,http://images.amazon.com/images/P/0192126040.0...,http://images.amazon.com/images/P/0192126040.0...


In [47]:
reader = Reader(rating_scale=(0, 10))
data = Dataset.load_from_df(ratings_df[['User-ID', 'ISBN', 'Book-Rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2)

In [48]:
collab_model = SVD()
collab_model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x13f329c8680>

In [49]:
def get_collaborative_filtering_recommendations(user_id, top_n=10):
    reader = Reader(rating_scale=(0, 10))
    data = Dataset.load_from_df(ratings[['User-ID', 'ISBN', 'Book-Rating']], reader)    
    trainset, testset = train_test_split(data, test_size=0.2)
    model = SVD()
    model.fit(trainset)
    user_ratings = ratings[ratings['User-ID'] == user_id]
    book_ratings = []
    for isbn in books_df['ISBN']:
        if isbn not in user_ratings['ISBN'].values:
            book_ratings.append((isbn, model.predict(user_id, isbn).est))    
    book_ratings.sort(key=lambda x: x[1], reverse=True)    
    return book_ratings[:top_n]

In [50]:
def get_content_based_recommendations(book_title, top_n=10):
    if book_title not in books_df['Book-Title'].values:
        raise ValueError(f"Book title '{book_title}' not found in the dataset.")
    books_df['combined'] = books_df['Book-Title'] + ' ' + books_df['Book-Author']
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(books_df['combined'])
    idx = books_df[books_df['Book-Title'] == book_title].index[0]
    cosine_sim = cosine_similarity(tfidf_matrix[idx], tfidf_matrix).flatten()
    similar_books = list(enumerate(cosine_sim))
    similar_books = sorted(similar_books, key=lambda x: x[1], reverse=True)
    top_books = similar_books[1:top_n+1]     
    recommendations = [(books_df['Book-Title'][i], score) for i, score in top_books]    
    return recommendations


In [51]:
def hybrid_recommender(user_id, book_title=None, top_n=10, alpha=0.5):
    collaborative_recs = get_collaborative_filtering_recommendations(user_id, top_n)
    if book_title:
        content_based_recs = get_content_based_recommendations(book_title, top_n)
    else:
        content_based_recs = []
    hybrid_recs = []
    if content_based_recs:
        for book, score in content_based_recs:
            hybrid_recs.append((book, score, "Content-Based"))
    for book, score in collaborative_recs:
        hybrid_recs.append((book, score, "Collaborative"))
    if content_based_recs and collaborative_recs:
        for i in range(len(hybrid_recs)):
            book, score, source = hybrid_recs[i]
            if source == "Collaborative":
                content_score = next((score_ for book_, score_ in content_based_recs if book_ == book), 0)
                hybrid_score = alpha * score + (1 - alpha) * content_score
                hybrid_recs[i] = (book, hybrid_score, "Hybrid")
    hybrid_recs.sort(key=lambda x: x[1], reverse=True)
    return hybrid_recs[:top_n]

In [52]:
user_id = 276725 
book_title = "classical mythology"  
top_n = 10

In [53]:
recommendations = hybrid_recommender(user_id, book_title, top_n, alpha=0.5)

In [54]:
print("Top-N Hybrid Recommendations:")
for book, score, source in recommendations:
    print(f"Book: {book}, Score: {score}, Source: {source}")

Top-N Hybrid Recommendations:
Book: 0743454529, Score: 4.551127890215301, Source: Hybrid
Book: 0345339738, Score: 4.5359279727324004, Source: Hybrid
Book: 0618002235, Score: 4.513282479802655, Source: Hybrid
Book: 0836220889, Score: 4.508350642169669, Source: Hybrid
Book: 0439425220, Score: 4.500839790400001, Source: Hybrid
Book: 0439136369, Score: 4.47456156797508, Source: Hybrid
Book: 0836213319, Score: 4.472428006462371, Source: Hybrid
Book: 0345348036, Score: 4.471106184911133, Source: Hybrid
Book: 067168390X, Score: 4.454391504152559, Source: Hybrid
Book: 0553274325, Score: 4.448115037594795, Source: Hybrid


In [55]:
user_id = 999999  
book_title = "classical mythology"
top_n = 5

recommendations = hybrid_recommender(user_id, book_title, top_n=top_n, alpha=0.5)

print(f"Content-Based Recommendations for New User {user_id}:")
for i, rec in enumerate(recommendations, 1):
    print(f"{i}. {rec[0]} | Score: {rec[1]:.4f} | Source: {rec[2]}")

Content-Based Recommendations for New User 999999:
1. 0439425220 | Score: 4.5288 | Source: Hybrid
2. 0743454529 | Score: 4.5226 | Source: Hybrid
3. 067168390X | Score: 4.5083 | Source: Hybrid
4. 0330262130 | Score: 4.4832 | Source: Hybrid
5. 0345339738 | Score: 4.4781 | Source: Hybrid
