In [12]:
import json
from sentence_transformers import SentenceTransformer, util

categories = ["Romance","Science Fiction","Dystopian","Thriller","Historical Fiction","Drama","Mystery","Fantasy"]

model = SentenceTransformer('all-mpnet-base-v2')
category_embeddings = model.encode(categories)

In [27]:
def category_sim_pairs(categories, similarities):
    output = []
    for index in range(len(categories)):
        output.append( (categories[index], similarities[index]) )

    output = sorted(output, key=lambda x: x[1], reverse=True)
    return output

In [None]:
with open('books.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

for entry in data:
    description = entry["description"]
    title = entry["title"]

    # description -> vector
    description_embedding = model.encode(description)

    # cosine sim (description, category)
    similarities = util.cos_sim(description_embedding, category_embeddings)[0].tolist()
    pairs = category_sim_pairs(categories, similarities)

    print(f"Title: {title}:")
    print(f"  Description: {description}")
    print(f"  Categories (closest to furthest):")
    for category, similarity in pairs:
        print(f"{category}: {similarity:.4f}")
    print("#"*30)

Title: Pride and Prejudice:
  Description: A classic novel about the manners and matrimonial machinations among the British gentry in the early 19th century.
  Categories (closest to furthest):
Historical Fiction: 0.4853
Romance: 0.2863
Drama: 0.2737
Science Fiction: 0.2226
Dystopian: 0.2207
Thriller: 0.2063
Fantasy: 0.1531
Mystery: 0.1091
Title: 1984:
  Description: A dystopian social science fiction story that explores the dangers of totalitarianism and pervasive government surveillance.
  Categories (closest to furthest):
Dystopian: 0.4533
Historical Fiction: 0.3936
Science Fiction: 0.3577
Thriller: 0.3305
Drama: 0.2296
Romance: 0.1940
Mystery: 0.1890
Fantasy: 0.1253
Title: To Kill a Mockingbird:
  Description: Set in a small Southern town during the Great Depression, this novel tackles racial injustice and the moral growth of its young narrator.
  Categories (closest to furthest):
Historical Fiction: 0.4622
Dystopian: 0.3111
Thriller: 0.2244
Drama: 0.2138
Romance: 0.1927
Science Fi