In [13]:
import pandas as pd
import networkx as nx

# Load books and user ratings datasets
books = pd.read_csv('datasets/book/book1-100k.csv').head(10000)
user_books = pd.read_csv('datasets/rating/user_rating_0_to_1000.csv').head(10000)

# Choose columns used for the algorithm
books = books[['Id', 'Name', 'Authors', 'Rating', 'pagesNumber']]
user_books = user_books[['ID', 'Name', 'Rating']]

# Rename columns
user_books = user_books.rename(columns={'Rating': 'user_rating'})

# Normalize book names
books['Name'] = books['Name'].str.strip().str.lower()
user_books['Name'] = user_books['Name'].str.strip().str.lower()

# Merge datasets
user_books = user_books.merge(books, on="Name", how="left")

# Create graph
G = nx.Graph()

# Add book nodes
for _, row in books.iterrows():
    G.add_node(row['Id'], type='book', name=row['Name'], 
               authors=row['Authors'], rating=row['Rating'], pages=row['pagesNumber'])

# Transform ratings
rating_mapping = {
    "it was amazing": 5,
    "really liked it": 4,
    "liked it": 3,
    "it was ok": 2,
    "did not like it": 1
}

# Add user nodes and edges
for _, row in user_books.iterrows():
    user_node = f"user_{row['ID']}"
    book_node = row['Id']
    G.add_node(user_node, type='user')

    user_rating = row.get('user_rating')
    if user_rating in rating_mapping:
        G.add_edge(user_node, book_node, weight=rating_mapping[user_rating])

# Calculate the mean pages of books read by an user
mean_pages = {}
user_authors = {}

for user_node in G.nodes:
    if G.nodes[user_node].get('type') == 'user':
        user_books_read = [book_node for book_node in G.neighbors(user_node)]
        
        total_pages = 0
        books_count = 0
        authors_set = set()
        
        # Sum the pages of the books the user has read
        for book_node in user_books_read:
            if book_node in G.nodes and 'pages' in G.nodes[book_node]:
                total_pages += G.nodes[book_node]['pages']
                books_count += 1
                authors_set.update(G.nodes[book_node]['authors'].split(","))
        
        # Save mean pages and authors for the user
        if books_count > 0:
            mean_pages[user_node] = total_pages / books_count
        else:
            mean_pages[user_node] = 0
        
        user_authors[user_node] = authors_set


In [26]:
def recommend_books_for_user(user_id):
    user_node = f"user_{user_id}"
    
    # Get the mean pages for the user
    user_mean = mean_pages.get(user_node, 0)
    user_author_set = user_authors.get(user_node, set())
    
    # Calculate scores for each book
    book_scores = {}
    for book_node in G.nodes:
        if G.nodes[book_node].get('type') == 'book':
            # Get the global rating and pages of the book
            book_rating = G.nodes[book_node]['rating']
            book_pages = G.nodes[book_node]['pages'] 
            
            if abs(book_pages - user_mean) < 150:
                page_similarity = 1
            elif abs(book_pages - user_mean) < 300 and abs(book_pages - user_mean) > 150:
                page_similarity = 0.5
            else:
                page_similarity = 0
            
            score = book_rating + page_similarity
            
            # Add bonus for authors match
            author_bonus = 0
            book_authors = set(G.nodes[book_node]['authors'].split(","))
            if book_authors & user_author_set:  
                author_bonus = 1
            
            score += author_bonus  
            
            book_scores[book_node] = score
    
    # Sort books by score
    recommended_books = sorted(book_scores.items(), key=lambda x: x[1], reverse=True)
    
    # Get the top N recommended books
    top_recommended_books = [book[0] for book in recommended_books[:5]]
    
    return top_recommended_books


In [27]:
def display_recommendations(user_id, recommended_books):
    print(f"\nRecommended Books for User {user_id}:\n")
    
    for idx, book in enumerate(recommended_books, 1):
        book_name = G.nodes[book]['name']
        book_authors = G.nodes[book]['authors']
        print(f"{idx}. {book_name}")
        print(f"   Authors: {book_authors}")
        print("=" * 50)

# Example
user_id = 1
recommended_books = recommend_books_for_user(user_id)

# Display recommendations
display_recommendations(user_id, recommended_books)



Recommended Books for User 1:

1. bullets, badges, and bullshit
   Authors: Phil T. O'Brien
2. the real estate investor's tax guide
   Authors: Vernon Hoven
3. singers, heroes, and gods in the odyssey
   Authors: Charles Segal
4. greek literature in the classical period: the prose of historiography and oratory: greek literature
   Authors: Gregory Nagy
5. greek literature in the roman period and in late antiquity: greek literature
   Authors: Gregory Nagy
