This file is meant to create a "enjoyment prediction" between a user's trope preferences and a book's predicted tropes (which we already have from our logistic model).

In [1]:
import numpy as np
import pickle

# load in our already created model and vectorizer
with open("logreg_model.pkl", "rb") as f:
    model = pickle.load(f)

with open("tfidf_vectorizer.pkl", "rb") as f:
    vectorizer = pickle.load(f)

# get trope names from tropes df
import pandas as pd
tropes = pd.read_csv("data/model_trope_vocabulary.csv")
trope_names = tropes["name"].tolist()

In [5]:
def pred_enjoyment(user_input_indices, book_blurb, threshold = 0.5):
    """
    Predict how much user enjoys book based on favorite tropes they input

    Params:
    user_input_indices: list of int
        Indices of tropes based on user selection 
    book_blurb: str
        boook's description
    threshold: float
        Minimum threshold to consider trope is present in book
    """
    # get trope probabilites from model
    X = vectorizer.transform([book_blurb])
    book_trope_probs = model.predict_proba(X)[0]

    # binary vector for user pref: 1 for liked trope, 0 for not
    user_pref = np.zeros(len(trope_names))
    user_pref[user_input_indices] = 1

    # calculate match scores:

    # only count tropes that are above thresh
    book_trope_present = (book_trope_probs >= threshold).astype(int)
    # find how many of user's favorite tropes present in book
    overlap = np.dot(user_pref, book_trope_present)
    max_overlap = np.sum(user_pref)
    overlap_score = overlap / max_overlap if max_overlap > 0 else 0

    # weighted probability score:
    # use actual probs instead of binary presence
    weighted_score = np.dot(user_pref, book_trope_probs) / np.sum(user_pref) if np.sum(user_pref) > 0 else 0

    # combined score: balances presence of tropes in book blurbs and the weight of tropes
    combined_score = (overlap_score * 0.4) + (weighted_score * 0.6)

    # return a breakdown of scores
    trope_matches = []
    for idx in user_input_indices:
        trope_matches.append({
            'trope_name': trope_names[idx],
            'probability': float(book_trope_probs[idx]),
            'present': bool(book_trope_probs[idx] >= threshold)
        })
    
    top_trope_indx = np.argsort(book_trope_probs)[-5:][::-1]
    book_top_tropes = []
    for i in top_trope_indx:
        book_top_tropes.append({
            'trope_name': trope_names[i],
            'probability': float(book_trope_probs[i])
        })
    
    # recommendation message based on combined_score:
    if combined_score >= 0.7:
        message = "You would LOVE this book!!"
    elif combined_score >= 0.5:
        message = "You'll probably enjoy this book!"
    elif combined_score >= 0.3:
        message = "This book could either be a hit or miss"
    else:
        message="Maybe this book just isn't for you..."
    
    return{
        'overall_score': float(combined_score),
        'overlap_score': float(overlap_score),
        'weighted_score': float(weighted_score),
        'message': message,
        'user_trope_matches': trope_matches,
        'book_top_tropes': book_top_tropes,
        'total_user_tropes_found': int(overlap),
        'total_user_tropes_selected': int(max_overlap)
    }

In [14]:
# Example use

def test():
    # fill in any number from trope indices 0-43
    user_favorites = [8, 10, 1, 2,]
    
    # Book blurb
    blurb = """
    When a series of coded messages appear at crime scenes across the city, a detective realizes they 
    mimic puzzles from her childhood. To catch the killer, she must revisit a past she’s tried to forget—before he turns his next riddle deadly.
    """
    
    result = pred_enjoyment(user_favorites, blurb)
    
    print(f"Overall Enjoyment Score: {result['overall_score']:.2%}")
    print(f"{result['message']}\n")
    
    print("Your Favorite Tropes in This Book:")
    for match in result['user_trope_matches']:
        # can implement better in front end
        status = "<Check mark>" if match['present'] else "X"
        # print status, trope name, probability
        print(f"{status} {match['trope_name']}: {match['probability']:.1%}")
    # give fracation of tropes found
    print(f"\nFound {result['total_user_tropes_found']}/{result['total_user_tropes_selected']} of your favorite tropes!")
    
    # # print top tropes actually found in book
    # print("\nTop Tropes in This Book:")
    # for trope in result['book_top_tropes']:
    #     print(f"{trope['trope_name']}: {trope['probability']:.1%}")

if __name__ == "__main__":
    test()

Overall Enjoyment Score: 3.08%
Maybe this book just isn't for you...

Your Favorite Tropes in This Book:
X Historical Fiction: 6.3%
X Young Adult / Coming-of-Age: 3.3%
X Contemporary / General Fiction: 6.1%
X Fantasy: 4.8%

Found 0/4 of your favorite tropes!
