In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [None]:
ratings = pd.read_csv('Ratings.csv', sep=';')


In [None]:
from sklearn.datasets import dump_svmlight_file
from scipy.sparse import csr_matrix
user_map = {id: idx+1 for idx, id in enumerate(ratings['User-ID'].unique())}
book_map = {id: idx+1 for idx, id in enumerate(ratings['ISBN'].unique())}

user_indx = ratings['User-ID'].map(user_map)
book_indx = ratings['ISBN'].map(book_map)

sparse_matrix = csr_matrix((ratings['Rating'], (user_indx, book_indx)))

col1 = [0] + ratings['User-ID'].unique().tolist()

with open('user_book_matrix.libsvm', 'wb') as f:
    dump_svmlight_file(sparse_matrix, col1, f, zero_based=True)

print("Sparse matrix saved to user_book_matrix.libsvm")

Sparse matrix saved to user_book_matrix.libsvm


In [None]:
def drop_y_column(input_file, output_file):
    with open(input_file, 'r') as infile, open(output_file, 'w') as outfile:
        for line in infile:
            parts = line.split()
            outfile.write(" ".join(parts[1:]) + "\n")

drop_y_column('user_book_matrix.libsvm','UserBookMatrix.libsvm')

In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_svmlight_file

# Load the sparse user-book matrix
data, _ = load_svmlight_file("UserBookMatrix.libsvm", zero_based=True, multilabel=True)
data = data.astype(int)
ratings_matrix = data.toarray()  # Convert to dense matrix for processing

# Load books information
books = pd.read_csv('Books.csv', sep=';')

# Reverse the mappings for users and books
user_map_reverse = {v: k for k, v in user_map.items()}
book_map_reverse = {v: k for k, v in book_map.items()}



# Optimized cosine similarity computation for non-zero indices
def compute_similarity_for_nonzero(user_idx):
    target_user = ratings_matrix[user_idx - 1]  # Convert to 0-based index
    non_zero_indices = np.nonzero(target_user)[0]  # Indices where the target user has non-zero ratings

    # Subset the matrix for relevant indices
    sub_matrix = ratings_matrix[:, non_zero_indices]

    # Compute dot products for relevant indices
    dot_products = np.dot(sub_matrix, target_user[non_zero_indices])

    # Compute norms for relevant indices
    norms = np.linalg.norm(sub_matrix, axis=1) * np.linalg.norm(target_user[non_zero_indices])

    # Compute cosine similarity
    with np.errstate(divide='ignore', invalid='ignore'):
        similarities = dot_products / norms
        similarities = np.nan_to_num(similarities)  # Replace NaNs with 0

    return similarities

# Set K
K = 10

# Output storage
recommendations = []

# Generate recommendations
for u_idx in range(105265, ratings_matrix.shape[0] + 1):  # Iterate over 1-based user IDs
    print(u_idx)
    # Compute similarity for the target user
    user_similarities = compute_similarity_for_nonzero(u_idx)

    # Get top K similar users (excluding self)
    top_k_similar_users = sorted(
        [(i + 1, sim) for i, sim in enumerate(user_similarities) if i != u_idx - 1],
        key=lambda x: x[1],
        reverse=True
    )[:K]

    # Build BK
    BK = set()
    for sim_user_idx, _ in top_k_similar_users:
        BK.update(np.where(ratings_matrix[sim_user_idx - 1] > 0)[0])  # Adjust for 0-based indexing

    # Calculate weighted average for books in BK
    estimated_ratings = {}
    for book_idx in BK:
        if ratings_matrix[u_idx - 1, book_idx] == 0:  # Only consider unread books
            numerator = 0
            denominator = 0
            for sim_user_idx, sim_score in top_k_similar_users:
                numerator += ratings_matrix[sim_user_idx - 1, book_idx] * sim_score  # Adjust index
                denominator += sim_score
            if denominator > 0:
                estimated_ratings[book_idx] = numerator / denominator

    # Sort books by estimated ratings and pick top 5
    top_books = sorted(estimated_ratings.items(), key=lambda x: x[1], reverse=True)[:5]
    for book_idx, score in top_books:
        book_isbn = book_map_reverse.get(book_idx + 1, None)  # Get ISBN, handle missing mapping
        if book_isbn is None:
            continue  # Skip if mapping is not found
        book_title_row = books.loc[books['ISBN'] == book_isbn, 'Title']
        if book_title_row.empty:
            continue  # Skip if book is not found in the books DataFrame
        book_title = book_title_row.values[0]
        recommendations.append({
            "User_ID": user_map_reverse[u_idx],
            "Book_ID": book_isbn,
            "Book_Title": book_title,
            "Recommendation_Score": score
        })

# Write recommendations to CSV
recommendations_df = pd.DataFrame(recommendations)
recommendations_df.to_csv("Book_Recommendations.csv", index=False)

print("Recommendations have been saved to 'Book_Recommendations.csv'")


105265
105266
105267
105268
105269
105270
105271
105272
105273
105274
105275
105276
105277
105278
105279
105280
105281
105282
105283
Recommendations have been saved to 'Book_Recommendations.csv'
