In [18]:
# Step 1: Data Preparation

import pandas as pd
data = pd.read_csv("book_details.csv")
data = pd.DataFrame(data)
books = data
print(data[["Authors","Title"]].head())

           Authors                                     Title
0  MICHAEL ABBEY 외              ORACLE 9i A BEGINNER'S GUIDE
1    Adrian Farrel           Network Management: Know It All
2    Adrian Farrel           Network Management: Know It All
3    Adrian Farrel           Network Management: Know It All
4       Peter Abel  IBM PC Assembly Language and Programming


In [19]:
# Step 2: TF-IDF Vectorization

from sklearn.feature_extraction.text import TfidfVectorizer

tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(data['Title'] + " " + data['Authors'])

In [20]:
# Step 3: BERT Embeddings

from transformers import BertTokenizer, BertModel
import torch
import numpy as np
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

def get_bert_embedding(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    with torch.no_grad():
        output = model(**inputs)
    return output['pooler_output'].numpy()

bert_embeddings = np.array([get_bert_embedding(text)[0] for text in (data['Title'] + " " + data['Authors'])])

In [21]:
# Step 4: Combining TF-IDF and BERT

from sklearn.metrics.pairwise import cosine_similarity

def get_combined_similarities(query):
    query_tfidf = tfidf_vectorizer.transform([query])
    query_bert = get_bert_embedding(query)
    
    tfidf_similarities = cosine_similarity(query_tfidf, tfidf_matrix)
    bert_similarities = cosine_similarity(query_bert, bert_embeddings)
    
    combined_similarities = 0.5 * tfidf_similarities + 0.5 * bert_similarities
    
    return combined_similarities[0]  # Return the flattened array

In [22]:
# Step 5: Recommendation Logic

import numpy as np

def recommend(query):
    combined_similarities = get_combined_similarities(query)
    
    recommended_books_indices = np.argsort(combined_similarities)[::-1][:5]  # Top 10 recommendations
    
    return data.iloc[recommended_books_indices]

In [23]:
print(recommend("show me books on networking"))

                                 Title              Authors  \
15  Star Schema The Complete Reference  Christopher Adamson   
14  Star Schema The Complete Reference  Christopher Adamson   
1      Network Management: Know It All        Adrian Farrel   
2      Network Management: Know It All        Adrian Farrel   
3      Network Management: Know It All        Adrian Farrel   

                                          Description                Rating  \
15  The definitive guide to dimensional design for...  Rating not available   
14  The definitive guide to dimensional design for...  Rating not available   
1                           Description not available  Rating not available   
2                           Description not available  Rating not available   
3                           Description not available  Rating not available   

             ISBN  
15  9780071744324  
14  9780071744324  
1   9788131221563  
2   9788131221563  
3   9788131221563  
