In [None]:
# Import Libraries
import numpy as np
import pandas as pd
from sklearn.feature_extraction import text
from sklearn.metrics.pairwise import cosine_similarity

import nltk
import re
#nltk.download('stopwords')
stemmer = nltk.SnowballStemmer("english")
from nltk.corpus import stopwords
import string
stopword=set(stopwords.words('english'))

from tabulate import tabulate

In [None]:
# Import Data
data = pd.read_csv("/kaggle/input/book-recommendation-dataset/Books.csv", nrows=67500)
data = data[["ISBN", "Book-Title", "Book-Author", "Year-Of-Publication", "Publisher"]]
data.columns = ["ISBN", "Title", "Author", "Year", "Publisher"]

In [None]:
# Data Cleaning
# Remove Missing Value Data
print(data.isnull().sum())
data = data.dropna()
print(data.isnull().sum())

In [None]:
# Data Preprocessing
def clean(text):
    text = str(text).lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)
    #text = [word for word in text.split(' ') if word not in stopword]
    #text=" ".join(text)
    #text = [stemmer.stem(word) for word in text.split(' ')]
    #text=" ".join(text)
    return text
data["cleaned_Title"] = data["Title"].apply(clean)

In [None]:
data

In [None]:
indices = pd.Series(data.index,index=data['cleaned_Title']).drop_duplicates()

In [None]:
indices

In [None]:
# Generate Similarity
def get_similarity_cosine(title):
    feature = data["cleaned_Title"].tolist()
    tfidf = text.TfidfVectorizer()
    tfidf_matrix = tfidf.fit_transform(feature)
    similarity_cosine = cosine_similarity(tfidf_matrix)
    del tfidf
    del tfidf_matrix
    del feature
    index = pd.Series(indices[title])
    return similarity_cosine[index[0]]

In [None]:
# Generate Similarity
def similarity(ISBN, title, similarity_scores, reverse):
    similarity_scores = list(enumerate(similarity_scores))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=reverse)
    similarity_scores = filter(lambda x: x[1] > 0, similarity_scores)
    similarity_scores = list(similarity_scores)
    movieindices = [i[0] for i in similarity_scores if i[0] < len(data)]
    scores = [i[1] for i in similarity_scores if i[0] < len(data)]
    result = pd.DataFrame([data.iloc[i] for i in movieindices])
    result['Similarity'] = scores
    result = result[['ISBN', 'Title', 'Similarity']]
    result = result[result["ISBN"] != ISBN]
    result2 = result[result["Similarity"] <= 0.5]
    result = result[result["Similarity"] > 0.5]
    result = result[:20] if result.count()[0] >= 20 else result[:result.count()[0]]
    result2 = result2[:20] if result2.count()[0] >= 20 else result2[:result2.count()[0]]
    result = result.set_index([np.arange(1, result.count()[0] + 1)])
    result2 = result2.set_index([np.arange(1, result2.count()[0] + 1)])
    print("ISBN: ", ISBN)
    print("Title: ", title)
    print("Book Recommendation:")
    
    print(tabulate(result, headers='keys', tablefmt = 'psql'))
    print(tabulate(result2, headers='keys', tablefmt = 'psql'))
        
    del similarity_scores
    del movieindices
    del scores
    del result
    del result2

In [None]:
# Generate Recommendation
def book_recommendation(ISBN, title):
    clean_title = clean(title)
    similarity_cosine = get_similarity_cosine(clean_title)
    
    similarity(ISBN, title, similarity_cosine, True)
    
    del similarity_cosine
    del clean_title
    

In [None]:
def Book(ISBN):
    book = data.loc[data["ISBN"] == ISBN]
    title = book.Title.item()
    book_recommendation(ISBN, title)

Book("051513290X")

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, GRU
import numpy as np

# Step 1: Prepare Data
texts = data['Book-Title'] + ' ' + data['Book-Author']  # Combine title and author for input
labels = data['Book-Title']  # Use book titles as the "recommendation target"

# Tokenize the text
tokenizer = Tokenizer(oov_token="<OOV>")
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, padding='post')

# Encode labels
label_tokenizer = Tokenizer()
label_tokenizer.fit_on_texts(labels)
label_sequences = label_tokenizer.texts_to_sequences(labels)
label_padded = pad_sequences(label_sequences, padding='post')

# Map indices to titles for decoding
index_to_title = {v: k for k, v in label_tokenizer.word_index.items()}

# Step 2: Build RNN Model
model = Sequential([
    Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=64, input_length=padded_sequences.shape[1]),
    LSTM(128, return_sequences=False),
    Dense(len(label_tokenizer.word_index) + 1, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Step 3: Train the Model
labels_for_training = np.array([x[0] if len(x) > 0 else 0 for x in label_padded])  # Handle empty sequences
model.fit(padded_sequences, labels_for_training, epochs=10, batch_size=32)

# Step 4: Make Recommendations
def recommend_book(book_title, num_recommendations=5):
    # Preprocess input
    seq = tokenizer.texts_to_sequences([book_title])
    padded_seq = pad_sequences(seq, maxlen=padded_sequences.shape[1], padding='post')
    
    # Predict
    predictions = model.predict(padded_seq)
    top_indices = np.argsort(predictions[0])[-num_recommendations:][::-1]
    
    # Decode predictions
    recommendations = [index_to_title[idx] for idx in top_indices if idx in index_to_title]
    return recommendations

# Test Recommendation
test_book = "Harry Potter and the Sorcerer's Stone"  # Example input
recommended_books = recommend_book(test_book)
print("Recommendations for:", test_book)
print(recommended_books)
