In [97]:
!pip install -q sentence-transformers


In [98]:
from sentence_transformers import SentenceTransformer
import pandas as pd
import numpy as np
import torch

In [99]:
dataset1=pd.read_csv("BookDesc.csv")

dataset1.isna().sum()

title            0
categories      99
description    262
dtype: int64

In [100]:
#REMOVES ROWS WITH INVALID DESCRIPTION COLUMN
dataset1["description"] = (
    dataset1["description"]
    .astype(str)
    .str.strip()
)

dataset1 = dataset1[
    (dataset1["description"].notna()) &
    (dataset1["description"] != "") &
    (~dataset1["description"].str.lower().isin(["nan", "...", "."]))
]

dataset1 = dataset1[
    dataset1["description"].str.len() > 15
]



In [102]:
#REMOVES ROWS WITH EMPTY CATEGORIES COLUMN

dataset1["categories"] = (
    dataset1["categories"]
    .astype(str)
    .str.strip()
)

dataset1 = dataset1[
    (~dataset1["categories"].isin(["", "nan"]))
]

In [103]:
dataset1["text"] = (

    "Title: " + dataset1["title"].astype(str) + ". "
    "Category: " + dataset1["categories"].astype(str)+ ". " 
    "Description: " + dataset1["description"].astype(str) 
    
    
)

dataset1 = dataset1.drop(columns=["title", "categories", "description"])



In [105]:
model=SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(dataset1['text'].tolist())

Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


In [106]:
mood_vocab = {

    "low_energy": [
        "tired", "exhausted", "drained", "sleepy", "sluggish",
        "burned-out", "dead", "lazy"
    ],

    "stressed": [
        "stressed", "overwhelmed", "busy", "hectic",
        "frustrated", "irritated", "annoyed", "done", "over it", "cooked"
    ],

    "foggy": [
        "bored", "distracted", "confused", "meh",
        "lost", "spaced-out", "numb", "awkward"
    ],

    "positive_calm": [
        "chilling", "relaxed", "peaceful", "content", "grateful"
    ],

    "positive_high": [
        "excited", "hyped", "motivated", "inspired", "productive", "solid"
    ],

    "negative_emotional": [
        "sad", "lonely", "upset"
    ],

    "anxious": [
        "anxious", "nervous", "worried"
    ],

    "angry": [
        "angry", "hangry"
    ],

    "neutral": [
        "good", "fine"
    ]
}

pace_map = {
    "low_energy": "slow",
    "foggy": "slow",
    "stressed": "moderate",
    "anxious": "slow",
    "negative_emotional": "slow",

    "positive_calm": "slow",
    "neutral": "moderate",

    "positive_high": "fast",
    "angry": "fast"
}

tone_map = {
    "low_energy": "gentle",
    "foggy": "simple",
    "stressed": "comforting",
    "anxious": "reassuring",
    "negative_emotional": "empathetic",

    "positive_calm": "reflective",
    "neutral": "balanced",

    "positive_high": "energetic",
    "angry": "intense"
}


In [108]:
#first lets convert all strings to lowercase
def normalize(text: str) -> str:
    return text.lower()

#go throught the array of moods
def extract_moods(query: str) -> set:
    found = set()
    for mood, keywords in mood_vocab.items():
        for kw in keywords:
            if kw in query:
                found.add(mood)
                break
    return found

def resolve_attribute(found_moods: set, mapping: dict):
    for moode in found_moods:
        if moode in mapping:
            return mapping[moode]
    return None

def enrich_query(query: str) -> str:
    original_query = query
    query_norm = normalize(query)

    found_moods = extract_moods(query_norm)

    # Monotonic guarantee
    if not found_moods:
        return original_query

    pace = resolve_attribute(found_moods, pace_map)
    tone = resolve_attribute(found_moods, tone_map)

    enrichment_parts = []

    if found_moods:
        enrichment_parts.append(
            "mood: " + ", ".join(sorted(found_moods))
        )
    if pace:
        enrichment_parts.append(f"pace: {pace}")
    if tone:
        enrichment_parts.append(f"tone: {tone}")

    enrichment = " | " + " ".join(enrichment_parts)

    return original_query + enrichment

In [109]:
def search(qry: str) -> str:
    
    query_embedding = model.encode_query(enriched_query)
    
    query_similarities = model.similarity(embeddings, query_embedding)
    
    query_similarities = query_similarities.squeeze()
    
    scores, indices = torch.topk(query_similarities, k=3)
    
    for score, idx in zip(scores, indices):
        raw_text = dataset1['text'].iloc[idx.item()]
        
        title_part = raw_text.split("Title: ")[1].split(". Category:")[0]
        category_part = raw_text.split("Category: ")[1].split(". Description:")[0]
        description_full = raw_text.split("Description: ")[1]
        
        first_sentence = description_full.split(".")[0]

        print(f"Title : {title_part}")
        print(f"Category: {category_part}")
        print(f"Description: {first_sentence}...")
        print(f"Score: {score.item():.4f}") # Displays the numerical score
        print("-" * 30)

In [111]:
query=["I am tired, recommend something funny"]
enriched_query= enrich_query(query[0])
print(enriched_query)



I am tired, recommend something funny | mood: low_energy pace: slow tone: gentle


In [112]:
search(enriched_query)

Title : Today I Feel Silly & Other Moods That Make My Day
Category: Juvenile Fiction
Description: Today I feel silly...
Score: 0.4978
------------------------------
Title : I'm Too Young To Be Seventy
Category: Humor
Description: The beloved author of Forever Fifty and Suddenly Sixty tackles the ins and outs of becoming a septuagenarian with wry good humor...
Score: 0.3995
------------------------------
Title : Aches & Pains
Category: Humor
Description: Takes a humorous view of medical care, hospital stays, and convalescence...
Score: 0.3899
------------------------------
