In [None]:
# Install gensim if you haven't already
# !pip install gensim

import gensim
from gensim.models import Word2Vec
from gensim.utils import simple_preprocess
import pandas as pd


data_path = 'books_dataset.csv'
df = pd.read_csv(data_path)


processed_descriptions = [simple_preprocess(description) for description in df['description']]

# Train Word2Vec model
word2vec_model = Word2Vec(sentences=processed_descriptions, vector_size=100, window=5, min_count=1, workers=4)

# Save or load the trained model
# Save the model
word2vec_model.save('book_word2vec_model.bin')

# Load the model
# word2vec_model = Word2Vec.load('book_word2vec_model.bin')

# Example: Finding the most similar books to a given book
def recommend_similar_books(book_title, num_books=5):
    similar_books = word2vec_model.wv.most_similar(book_title, topn=num_books)
    return similar_books

# Example: Getting the embedding vector for a word
def get_embedding_vector(word):
    embedding_vector = word2vec_model.wv[word]
    return embedding_vector

# Example usage
book_title = "The Great Gatsby"
similar_books = recommend_similar_books(book_title)
print(f"Books most similar to '{book_title}':")
for book, similarity in similar_books:
    print(f"Book: {book}, Similarity: {similarity:.2f}")
