In [1]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

# Load data
data = pd.read_csv(r"C:\Users\sriva\Downloads\book_data.csv.zip")

# Drop rows with null values in 'book_desc'
data = data.dropna(subset=['book_desc'])

# Verify data loading
print("Data loaded successfully. Number of books:", data.shape[0])

# Extract features (book descriptions)
features = data["book_desc"].tolist()

# Initialize TF-IDF Vectorizer
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(features)

# Compute cosine similarity matrix using sparse matrices
similarity = linear_kernel(csr_matrix(tfidf_matrix), csr_matrix(tfidf_matrix))

# Set book_title column as index for fast lookup
indices = pd.Series(data.index, index=data['book_title']).drop_duplicates()

# Define function to recommend similar books using Nearest Neighbors
def book_recommendation(title, tfidf_matrix=tfidf_matrix, nbrs=None, indices=indices):
    if title not in indices:
        return f"Book title '{title}' not found in the dataset."
    
    if nbrs is None:
        # Fit the Nearest Neighbors model
        nbrs = NearestNeighbors(n_neighbors=6, algorithm='brute', metric='cosine').fit(tfidf_matrix)
    
    index = indices[title]
    distances, indices = nbrs.kneighbors(tfidf_matrix[index], n_neighbors=6)
    
    # Exclude the book itself
    recommended_indices = indices[0][1:]
    
    # Check if recommendations are found
    if len(recommended_indices) == 0:
        return "No recommendations found."
    
    return data['book_title'].iloc[recommended_indices]

# Example usage
input_title = input("Enter the book name: ")
print("\nTop 5 Recommended Books:")
recommendations = book_recommendation(input_title)
print(recommendations)

Data loaded successfully. Number of books: 52970


MemoryError: Unable to allocate 5.90 GiB for an array with shape (1584423582,) and data type int32

In [5]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

# Load data
data = pd.read_excel(r"C:\Users\sriva\OneDrive\Documents\new_data_book.xlsx")

# Drop rows with null values in 'book_desc'
data = data.dropna(subset=['book_desc'])

# Verify data loading
print("Data loaded successfully. Number of books:", data.shape[0])

# Extract features (book descriptions)
features = data["book_desc"].tolist()

# Initialize TF-IDF Vectorizer
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(features)

# Compute cosine similarity matrix using sparse matrices
similarity = linear_kernel(csr_matrix(tfidf_matrix), csr_matrix(tfidf_matrix))

# Set book_title column as index for fast lookup
indices = pd.Series(data.index, index=data['book_title']).drop_duplicates()

# Define function to recommend similar books using Nearest Neighbors
def book_recommendation(title, tfidf_matrix=tfidf_matrix, nbrs=None, indices=indices):
    if title not in indices:
        return f"Book title '{title}' not found in the dataset."
    
    if nbrs is None:
        # Fit the Nearest Neighbors model
        nbrs = NearestNeighbors(n_neighbors=6, algorithm='brute', metric='cosine').fit(tfidf_matrix)
    
    index = indices[title]
    distances, indices = nbrs.kneighbors(tfidf_matrix[index], n_neighbors=6)
    
    # Exclude the book itself
    recommended_indices = indices[0][1:]
    
    # Check if recommendations are found
    if len(recommended_indices) == 0:
        return "No recommendations found."
    
    return data['book_title'].iloc[recommended_indices]

# Example usage
input_title = input("Enter the book name: ")
print("\nTop 5 Recommended Books:")
recommendations = book_recommendation(input_title)
print(recommendations)

ImportError: Missing optional dependency 'openpyxl'.  Use pip or conda to install openpyxl.

In [20]:
#!pip install openpyxl
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

# Load data from Excel file
#excel_file = r"C:\Users\sriva\Downloads\book_data.xlsx"
data = pd.read_excel(r"C:\Users\sriva\OneDrive\Documents\new_data_book.xlsx")  # Adjust sheet_name as per your Excel file structure

# Drop rows with null values in 'book_desc'
data = data.dropna(subset=['book_desc'])

# Verify data loading
print("Data loaded successfully. Number of books:", data.shape[0])

# Extract features (book descriptions)
features = data["book_desc"].tolist()

# Initialize TF-IDF Vectorizer
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(features)

# Compute cosine similarity matrix using sparse matrices
similarity = linear_kernel(csr_matrix(tfidf_matrix), csr_matrix(tfidf_matrix))

# Set book_title column as index for fast lookup
indices = pd.Series(data.index, index=data['book_title']).drop_duplicates()

# Define function to recommend similar books 
def book_recommendation(title, tfidf_matrix=tfidf_matrix, nbrs=None, indices=indices):
    if title not in indices:
        return f"Book title '{title}' not found in the dataset."
    
    if nbrs is None:
        # Fit the Nearest Neighbors model
        nbrs = NearestNeighbors(n_neighbors=6, algorithm='brute', metric='cosine').fit(tfidf_matrix)
    
    index = indices[title]
    distances, indices = nbrs.kneighbors(tfidf_matrix[index], n_neighbors=6)
    
    # Exclude the book itself
    recommended_indices = indices[0][1:]
    
    # Check if recommendations are found
    if len(recommended_indices) == 0:
        return "No recommendations found."
    
    return data['book_title'].iloc[recommended_indices]

# Example usage
input_title = input("Enter the book name: ")
print("\nTop 5 Recommended Books:")
recommendations = book_recommendation(input_title)
print(recommendations)

Data loaded successfully. Number of books: 1027


Enter the book name:  Lord of the Flies



Top 5 Recommended Books:
588           Left Behind
615     The Killer Angels
361      A Separate Peace
777    Night World, No. 1
192          Oliver Twist
Name: book_title, dtype: object
