In [1]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.metrics.pairwise import cosine_similarity
import pickle

# Load Data with dtype specification
books = pd.read_csv('Books.csv', dtype={'column_name': str}, low_memory=False)
ratings = pd.read_csv('Ratings.csv')
users = pd.read_csv('Users.csv')

# Reduce dataset size by filtering out users and books with few ratings
ratings_filtered = ratings.groupby('User-ID').filter(lambda x: len(x) >= 5)
books_filtered = ratings_filtered.groupby('ISBN').filter(lambda x: len(x) >= 5)

# Merge Data
df = pd.merge(books_filtered, books, on='ISBN')

# Create Pivot Table with reduced dataset
pt = df.pivot_table(index='Book-Title', columns='User-ID', values='Book-Rating', aggfunc='mean')
pt.fillna(0, inplace=True)

# Convert Pivot Table to Sparse Matrix
pt_sparse = csr_matrix(pt.values)

# Compute Cosine Similarity using Sparse Matrices
similarity_scores = cosine_similarity(pt_sparse)

# Recommendation Function
def recommend(book_name):
    try:
        index = np.where(pt.index == book_name)[0][0]
        similar_items = sorted(list(enumerate(similarity_scores[index])), key=lambda x: x[1], reverse=True)[1:5]
        
        data = []
        for i in similar_items:
            item = []
            temp_df = books[books['Book-Title'] == pt.index[i[0]]]
            item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
            item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
            item.extend(list(temp_df.drop_duplicates('Book-Title')['Image-URL-M'].values))
            data.append(item)
        
        return data
    except IndexError:
        return []

# Test the Recommendation Function
print(recommend('1984'))

# Save Data with Pickle
pickle.dump(pt_sparse, open('pt.pkl', 'wb'))
pickle.dump(books, open('books.pkl', 'wb'))
pickle.dump(similarity_scores, open('similarity_scores.pkl', 'wb'))


[['Animal Farm', 'George Orwell', 'http://images.amazon.com/images/P/0451526341.01.MZZZZZZZ.jpg'], ['Brave New World', 'Aldous Huxley', 'http://images.amazon.com/images/P/0060809833.01.MZZZZZZZ.jpg'], ['The Cigarette Girl: A Novel', 'Carol Wolper', 'http://images.amazon.com/images/P/1573221376.01.MZZZZZZZ.jpg'], ['We', 'Yevgeny Zamyatin', 'http://images.amazon.com/images/P/0380633132.01.MZZZZZZZ.jpg']]
