This notebook is used to configure and define the 'trope' vocbulary to be used in the TF-IDF processing.

In [None]:
import pandas as pd

tropes = [
      # === Genres ===
    {
        "trope_id": "action_adventure",
        "name": "Action & Adventure",
        "category": "genre",
        "description": "Plot-driven stories focused on physical action, danger, and high-stakes journeys.",
        "raw_tags": "Action, Adventure",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "contemporary_fiction",
        "name": "Contemporary / General Fiction",
        "category": "genre",
        "description": "Stories set roughly in the present day with realistic, non-speculative elements.",
        "raw_tags": "Contemporary, Urban",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "fantasy",
        "name": "Fantasy",
        "category": "genre",
        "description": "Stories with magical or supernatural elements, often in a secondary world.",
        "raw_tags": "Fantasy, High Fantasy, Low Fantasy, Urban Fantasy, Fairy Tale, Isekai",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "science_fiction",
        "name": "Science Fiction",
        "category": "genre",
        "description": "Stories centered on speculative technology, science, or futuristic settings.",
        "raw_tags": "Sci-Fi, Science Fiction, Space Opera, Cyberpunk, Steampunk",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "dystopian_post_apocalyptic",
        "name": "Dystopian / Post-Apocalyptic",
        "category": "genre",
        "description": "Stories set in collapsed, oppressive, or radically transformed societies.",
        "raw_tags": "Dystopian, Utopia, Apocalypse, Post-Apocalypse, Aftermath",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "horror",
        "name": "Horror",
        "category": "genre",
        "description": "Stories intended to frighten or unsettle, often featuring threats or the uncanny.",
        "raw_tags": "Horror, Slasher, Gothic",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "mystery_crime_thriller",
        "name": "Mystery / Crime / Thriller",
        "category": "genre",
        "description": "Stories driven by investigation, crime, or suspenseful high-stakes plots.",
        "raw_tags": "Mystery, Murder Mystery, Crime, Suspense, Thriller, Psychological Thriller",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "romance",
        "name": "Romance",
        "category": "genre",
        "description": "Stories where a romantic relationship is the primary focus of the plot and resolution.",
        "raw_tags": "Romance",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "historical_fiction",
        "name": "Historical Fiction",
        "category": "genre",
        "description": "Stories set primarily in a past era, often engaging with historical events or settings.",
        "raw_tags": "Historical Fiction, Alternate History, Period Piece, Western",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "paranormal_supernatural",
        "name": "Paranormal / Supernatural",
        "category": "genre",
        "description": "Stories featuring ghosts, magic, or supernatural phenomena in otherwise recognizable worlds.",
        "raw_tags": "Paranormal, Supernatural, Spiritual, Witchcraft, Angels, Demons, Vampires",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "young_adult_coming_of_age",
        "name": "Young Adult / Coming-of-Age",
        "category": "genre",
        "description": "Stories focused on teens or young adults navigating identity, growth, and transition.",
        "raw_tags": "Coming of Age, Young Adult, Teenagers, School, College/University, Family, Friendship",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "literary_slice_of_life",
        "name": "Literary / Slice of Life / Philosophical",
        "category": "genre",
        "description": "Character-driven stories emphasizing interiority, everyday life, and thematic or philosophical depth.",
        "raw_tags": "Drama, Slice of Life, Philosophical, Surreal, Tragedy, Poetry",
        "include_in_model": True,
        "notes": ""
    },

        # === Story tropes ===
    {
        "trope_id": "found_family",
        "name": "Found family",
        "category": "story_trope",
        "description": "Characters form a chosen, close-knit family over the course of the story.",
        "raw_tags": "Found Family, Band of Misfits, Team as Family, Team Bonding, Bromance, Friendship",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "chosen_one_destiny",
        "name": "Chosen one / destiny",
        "category": "story_trope",
        "description": "A character is singled out by prophecy or fate to play a special role.",
        "raw_tags": "Chosen One, Destiny, Fate, Backstory/Origin Story, Good Versus Evil",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "antihero_morally_grey",
        "name": "Anti-hero / morally grey lead",
        "category": "story_trope",
        "description": "The central character has ambiguous morals or operates outside conventional heroism.",
        "raw_tags": "Anti-Hero, Morally Grey, Morally Ambiguous Characters, Sympathetic Villain",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "redemption_arc",
        "name": "Redemption arc",
        "category": "story_trope",
        "description": "A character seeks to atone for past harm or wrongdoing.",
        "raw_tags": "Redemption, Regret, Heroes to Villains, Villains to Heroes",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "revenge_motive",
        "name": "Revenge",
        "category": "story_trope",
        "description": "A character is driven by a desire for revenge or retribution.",
        "raw_tags": "Revenge, Vengeance, Betrayal",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "political_intrigue",
        "name": "Political / court intrigue",
        "category": "story_trope",
        "description": "Plots driven by court politics, power struggles, and schemes.",
        "raw_tags": "Politics, Political Intrigue, Corruption, Royalty, Rebels, Revolution, Freedom Fighters",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "survival_war",
        "name": "Survival / war",
        "category": "story_trope",
        "description": "Characters struggle to survive war, conflict, or harsh conditions.",
        "raw_tags": "War, Military, Survival, Wilderness, Aftermath",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "heist_crime_caper",
        "name": "Heist / crime caper",
        "category": "story_trope",
        "description": "Storylines centered on planning and executing a crime or high-stakes mission.",
        "raw_tags": "Criminals, Bounty Hunters, Mafia, Spies, Robbery, Heist",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "kidnapping_captivity",
        "name": "Kidnapping / captivity",
        "category": "story_trope",
        "description": "A character is kidnapped, held captive, or otherwise trapped.",
        "raw_tags": "Kidnapping, Imprisonment, Locked In, Trapped, Rescue Missions",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "school_campus",
        "name": "School / campus story",
        "category": "story_trope",
        "description": "Stories centered on school, university, or campus life.",
        "raw_tags": "School, College, University, Higher Education, Teenagers, Delinquents",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "workplace_professional",
        "name": "Workplace / professional setting",
        "category": "story_trope",
        "description": "Stories focused on characters' jobs or professional environments.",
        "raw_tags": "Medical, Hospital, Restaurants, Workplace, Office",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "time_travel_reincarnation",
        "name": "Time travel / reincarnation",
        "category": "story_trope",
        "description": "Characters move through time or live multiple lives.",
        "raw_tags": "Time Travel, Time Loop, Past Lives, Reincarnation, Resurrection, Rebirth, Peggy Sue",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "afterlife_ghosts",
        "name": "Afterlife / ghosts",
        "category": "story_trope",
        "description": "Stories dealing with life after death, spirits, or haunting.",
        "raw_tags": "Afterlife, Ghosts, Spirits",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "supernatural_creatures",
        "name": "Supernatural creatures",
        "category": "story_trope",
        "description": "Prominent focus on non-human supernatural beings.",
        "raw_tags": "Vampires, Demons, Angels, Merpeople, Dragons, Mythical Beings & Creatures, Monsters, Witchcraft",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "secret_identity_double_life",
        "name": "Secret identity / double life",
        "category": "story_trope",
        "description": "A character hides their true identity or lives a double life.",
        "raw_tags": "Secret Identity, Undercover, Spies, Superheroes",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "road_trip_travel",
        "name": "Road trip / travel adventure",
        "category": "story_trope",
        "description": "Plot structured around a journey or extended travel.",
        "raw_tags": "Road Trip, Travel, Wilderness",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "next_generation_legacy",
        "name": "Next generation / legacy story",
        "category": "story_trope",
        "description": "Focus on children of previous heroes or inheriting a legacy.",
        "raw_tags": "Next Gen, Next Generation, Parenthood, Single Parents, Siblings, Orphans, Family",
        "include_in_model": True,
        "notes": ""
    },

       # === Representation ===
    {
        "trope_id": "lgbtq_lead",
        "name": "LGBTQ+ lead",
        "category": "representation",
        "description": "The primary point-of-view character or central protagonist is LGBTQ+.",
        "raw_tags": "Gay Character, Lesbian Character, Bisexual Character, Pansexual Character, Queer Character, LGBTQ Character, LGBTQ Themes",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "trans_nonbinary_lead",
        "name": "Trans or non-binary lead",
        "category": "representation",
        "description": "The main character is explicitly described as transgender, non-binary, or gender nonconforming.",
        "raw_tags": "Trans Character, Transgender Character, Nonbinary Character, Genderqueer Character",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "bipoc_lead",
        "name": "BIPOC lead",
        "category": "representation",
        "description": "The main character is Black, Indigenous, or a person of color and this identity is foregrounded.",
        "raw_tags": "Asian Character, Black Character, Indigenous Character, Latinx Character, Person of Color, BIPOC Character",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "disabled_lead",
        "name": "Disabled / chronically ill lead",
        "category": "representation",
        "description": "The main character has a physical disability or chronic illness that is explicitly acknowledged.",
        "raw_tags": "Disabled Character, Disability, Wheelchair User, Blind Character, Deaf Character, Chronic Illness",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "neurodivergent_lead",
        "name": "Neurodivergent lead",
        "category": "representation",
        "description": "The main character is autistic, ADHD, dyslexic, or otherwise explicitly described as neurodivergent.",
        "raw_tags": "Autistic Character, ADHD, Neurodivergent, Dyslexic Character",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "faith_centered_lead",
        "name": "Faith-centered lead",
        "category": "representation",
        "description": "Faith, religion, or spiritual practice is central to the main character’s identity or arc.",
        "raw_tags": "Jewish Character, Christian Character, Muslim Character, Hindu Character, Buddhist Character, Religion, Spiritual",
        "include_in_model": True,
        "notes": ""
    },


        # === Content warnings ===
    {
        "trope_id": "cw_violence_gore",
        "name": "Violence & gore",
        "category": "content_warning",
        "description": "Contains significant physical violence, combat, or graphic injury.",
        "raw_tags": "Violence, Gore, Blood, War, Fights, Weapons",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "cw_abuse",
        "name": "Abuse",
        "category": "content_warning",
        "description": "Depicts emotional, physical, or domestic abuse.",
        "raw_tags": "Abuse, Domestic Abuse, Emotional Abuse",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "cw_bullying_harassment",
        "name": "Bullying & harassment",
        "category": "content_warning",
        "description": "Includes bullying, harassment, or targeted cruelty toward a character.",
        "raw_tags": "Bullying, Harassment, Humiliation",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "cw_self_harm_suicide",
        "name": "Self-harm & suicide",
        "category": "content_warning",
        "description": "References or depicts self-harm, suicidal ideation, or suicide attempts.",
        "raw_tags": "Self-Harm, Suicide",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "cw_substance_use",
        "name": "Substance use & addiction",
        "category": "content_warning",
        "description": "Features problematic use of alcohol or drugs, including addiction.",
        "raw_tags": "Drugs, Alcohol, Substance Abuse",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "cw_medical_trauma",
        "name": "Serious illness & medical trauma",
        "category": "content_warning",
        "description": "Focuses on serious illness, injury, or distressing medical situations.",
        "raw_tags": "Illness, Sick Character, Medical, Hospital, Injury",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "cw_kidnapping_captivity",
        "name": "Kidnapping & captivity",
        "category": "content_warning",
        "description": "Involves kidnapping, forced confinement, or captivity.",
        "raw_tags": "Kidnapping, Imprisonment, Locked In, Trapped",
        "include_in_model": True,
        "notes": ""
    },
    {
        "trope_id": "cw_trauma_ptsd",
        "name": "Trauma & PTSD themes",
        "category": "content_warning",
        "description": "Centers on traumatic experiences and their psychological aftermath.",
        "raw_tags": "Trauma, Nightmares, Mental Health Issues, Depression, Grief, Mourning, Loss",
        "include_in_model": True,
        "notes": ""
    },

]

  # === Column Definitions ===
# trope_id -> Short, machine-friendly identifier in snake_case (e.g., found_family, cw_violence)
# name -> Human-readable label for the trope (e.g., "Found family", "Enemies to lovers").
# catergory -> Broad bucket that this trope belongs to, like: "genre", "story_trope", "romance_trope", "representation", "content_warning"
# description -> 1–2 sentence explanation of what the trope means in plain language.
# raw_tags -> Comma-separated list of the original noisy tags that should map into this corporate trope,
# include_in_model -> boolean to include in ML model



In [20]:
#convert to data frame and view it
trope_df = pd.DataFrame(tropes)
trope_df

Unnamed: 0,trope_id,name,category,description,raw_tags,include_in_model,notes
0,action_adventure,Action & Adventure,genre,Plot-driven stories focused on physical action...,"Action, Adventure",True,
1,contemporary_fiction,Contemporary / General Fiction,genre,Stories set roughly in the present day with re...,"Contemporary, Urban",True,
2,fantasy,Fantasy,genre,"Stories with magical or supernatural elements,...","Fantasy, High Fantasy, Low Fantasy, Urban Fant...",True,
3,science_fiction,Science Fiction,genre,"Stories centered on speculative technology, sc...","Sci-Fi, Science Fiction, Space Opera, Cyberpun...",True,
4,dystopian_post_apocalyptic,Dystopian / Post-Apocalyptic,genre,"Stories set in collapsed, oppressive, or radic...","Dystopian, Utopia, Apocalypse, Post-Apocalypse...",True,
5,horror,Horror,genre,"Stories intended to frighten or unsettle, ofte...","Horror, Slasher, Gothic",True,
6,mystery_crime_thriller,Mystery / Crime / Thriller,genre,"Stories driven by investigation, crime, or sus...","Mystery, Murder Mystery, Crime, Suspense, Thri...",True,
7,romance,Romance,genre,Stories where a romantic relationship is the p...,Romance,True,
8,historical_fiction,Historical Fiction,genre,"Stories set primarily in a past era, often eng...","Historical Fiction, Alternate History, Period ...",True,
9,paranormal_supernatural,Paranormal / Supernatural,genre,"Stories featuring ghosts, magic, or supernatur...","Paranormal, Supernatural, Spiritual, Witchcraf...",True,


In [21]:
#convert to sheet to be able to use 
trope_df.to_csv("trope_vocabulary.csv", index=False)

In [None]:
#1.import the base sheet for the trope vocabulary
import pandas as pd

# --- load trope vocabulary ---
tropes = pd.read_csv("../data/trope_vocabulary.csv")

# --- load books with genres to build all_genre_tokens ---
all_genres_goodreads = pd.read_csv("../data/clean_best_books.csv", usecols=["genres"])
all_subjects_ia = pd.read_csv("../data/ia_clean_dataset.csv", usecols=["subject_list"])

In [58]:
#expanding the raw tags to make it easier to detect in blurbs
blurb_match_rules = {
    # === GENRE / META-GENRE BUCKETS ===
      "action_adventure": [
        "journey",
        "adventure",
        "epic tale",
        "adventures",
        "quest",
        "saga",
    ],
    "contemporary_fiction": [
        "contemporary novel",
        "set in the present day",
        "modern-day story",
        "modern setting",
        "realistic fiction",
        "quiet character study",
        " fiction ",
    ],
    "fantasy": [
        "fantasy world",
        "magical kingdom",
        "epic fantasy",
        "high fantasy",
        "urban fantasy",
        "dark fantasy",
        "wielding magic",
        "ancient prophecy",
        "quest to save",
        "sorcerer",
        "wizard",
        "sorcery",
        "magic",
        "empires",
        "curse",
    ],
    "science_fiction": [
        "science fiction",
        "superhero",
        "sci-fi",
        "distant future",
        "space station",
        "space opera",
        "alien invasion",
        "alien",
        "dystopian future",
        "cyberpunk city",
        "advanced technology",
        "parasite",
        "cure",
        "virus",
        "galaxy",
        "near-future",
        "sentient",
        "solar system",
    ],
    "dystopian_post-apocalyptic": [
        "dystopian future",
        "nuclear disaster",
        "dystopian world",
        "totalitarian regime",
        "post-apocalyptic",
        "after the apocalypse",
        "ruined world",
        "dystopia",
        "global catastrophe",
        "surviving the end of the world",
    ],
    "horror": [
        "horror novel",
        "terrifying",
        "unsettling",
        "haunted house",
        "chilling",
        "nightmarish",
        "unspeakable horror",
        "creeping dread",
    ],
    "mystery_crime_thriller": [
        "murder mystery",
        "whodunit",
        "detective novel",
        "private investigator",
        "serial killer",
        "crime thriller",
        "psychological thriller",
        "shocking twist",
    ],
    "romance": [
        "slow-burn romance",
        "enemies to lovers",
        "wlw romance",
        "mlm romance",
        "second chance romance",
        "heartwarming love story",
        "forbidden romance",
        "romantic comedy",
        "swoony romance",
        "grumpy x sunshine",
        "love story",
        "falls in love",
        "falling in love",
        "mate",
    ],
    "historical_fiction": [ #expand this
        "historical novel",
        "set in",
        "war-torn",
        "in the aftermath of the war",
        "period drama",
        "based on true events",
        "rich historical detail",
        "confederate",
        "civil war era"
        "king"
        "queen",
        "monarchy",
        "alternate history",
        "the ton",
    ],
    "paranormal_supernatural": [
        "paranormal romance",
        "supernatural forces",
        "ghostly presence",
        "haunted by spirits",
        "otherworldly",
        "sell her soul",
        "sell his soul",
        "sell their soul",
        "the veil between worlds",
    ],
    "young_adult_coming_of_age": [
        "young adult novel",
        "coming-of-age story",
        "high school drama",
        "high school"
        "senior year",
        "first love",
        "finding her place",
        "finding his place",
        "figuring out who she is",
        "figuring out who he is",
    ],
    "literary_slice_of_life": [
        "literary novel",
        "lyrical prose",
        "quiet, introspective",
        "character-driven",
        "slice of life",
        "meditation on",
        "examines grief and memory",
    ],

    # === STORY / PLOT TROPES ===
    "found_family": [
        "found family",
        "makeshift family",
        "chosen family",
        "band of misfits",
        "ragtag group of misfits",
        "unlikely allies become family",
        "tight-knit group",
        "the only family they have left",
    ],
    "chosen_one_destiny": [
        "Chosen One",
        "chosen one",
        "prophesied hero",
        "destined to save the world",
        "marked by destiny",
        "marked by fate",
        "ancient prophecy",
        "fate of the world rests",
    ],
    "antihero_morally_grey": [
        "anti-hero",
        "antihero",
        "morally grey",
        "morally gray",
        "morally ambiguous",
        "reluctant hero",
        "reluctant villain",
        "sympathetic villain",
        "does the wrong things for the right reasons",
        "walks the line between good and evil",
    ],
    "redemption_arc": [
        "redemption arc",
        "seeking redemption",
        "seeking forgiveness",
        "trying to atone",
        "atone for past mistakes",
        "haunted by his past",
        "haunted by her past",
        "second chance at redemption",
        "trying to make amends",
    ],
    "revenge_motive": [
        "revenge tale",
        "sworn to avenge",
        "out for revenge",
        "out for blood",
        "driven by vengeance",
        "hell-bent on revenge",
        "hunting down the man who",
        "after the person who ruined",
        "cannot forgive what was done",
    ],
    "political_intrigue": [
        "political intrigue",
        "court politics",
        "palace intrigue",
        "scheming nobles",
        "plots and betrayals",
        "backroom deals",
        "power struggle",
        "fight for the throne",
        "within the royal court",
    ],
    "survival_war": [
        "on the front lines",
        "behind enemy lines",
        "in the trenches",
        "war-torn",
        "soilders",
        "Navy",
        "caught in the crossfire",
        "struggling to survive the war",
        "survival in a war zone",
        "refugees fleeing the war",
    ],
    "heist_crime_caper": [
        "high-stakes heist",
        "one last job",
        "pull off the perfect heist",
        "crew of thieves",
        "elaborate scheme",
        "criminal mastermind",
        "carefully planned robbery",
        "caper gone wrong",
    ],
    "kidnapping_captivity": [
        "kidnapped",
        "abducted",
        "taken hostage",
        "held captive",
        "prisoner in",
        "locked in a basement",
        "cannot escape",
    ],
    "school_campus": [
        "boarding school",
        "elite academy",
        "prestigious academy",
        "campus novel",
        "college campus",
        "university life",
        "roommates and exams",
        "dorm life",
        "school",
        "academy",
    ],
    "workplace_professional": [
        "corporate ladder",
        "office politics",
        "high-powered executive",
        "law firm",
        "hospital drama",
        "medical resident",
        "coworkers to lovers",
        " newsroom ",  # with spaces so it's less noisy
    ],
    "time_travel_reincarnation": [
        "time travel",
        "time-travel",
        "time traveler",
        "time traveller",
        "time loop",
        "relives the same day",
        "reliving the same day",
        "wakes up in the past",
        "reincarnation",
        "reborn with memories intact",
        "second life",
        "alternate timeline",
        "multiple timelines",
        "sent back in time",
    ],
    "afterlife_ghosts": [
        "afterlife",
        "lingers between life and death",
        "cross over to the other side",
        "ghostly presence",
        "haunted by ghosts",
        "spirit that refuses to move on",
    ],
    "supernatural_creatures": [
        "vampire prince",
        "ancient vampire",
        "werewolf pack",
        "werefwolf",
        "vampyre",
        "vampire",
        "fae",
        "fairy",
        "alpha werewolf",
        "shifter pack",
        "dragon shifter",
        "mermaid princess",
        "witch",
        "immortal",
        "giant",
        "zombie",
        "zombies",
        "mummy",
        "mummies",
        "giantess",
        "demon",
        "angel",
        "sea witch",
        "powerful witch",
        "coven of witches",
        "fallen angel",
        "celestial beings",
    ],
    "secret_identity_double_life": [
        "secret identity",
        "leads a double life",
        "spymaster",
        "by day he is",
        "spy",
        "spy agency",
        "spies",
        "agent",
        "by dat she is",
        "by night he is",
        "by night she is",
        "masked vigilante",
        "superhero alter ego",
        "spy living under an alias",
        "hiding who she really is",
        "hiding who he really is",
    ],
    "road_trip_travel": [
        "road trip",
        "cross-country journey",
        "cross country journey",
        "hits the road",
        "travels across the country",
        "backpacking trip",
        "long drive together",
        "on the open road",
    ],
    "next_generation_legacy": [
        "next generation",
        "inheritance",
        "picks up where her mother left off",
        "continues her father's legacy",
        "legacy of his family",
        "family legacy",
        "multi-generational saga",
        "their parents' secrets",
    ],

    # === REPRESENTATION TROPES ===
    "lgbtq_lead": [
        "gay protagonist",
        "gay man",
        "gay woman",
        "lesbian heroine",
        "lesbian",
        "sapphic",
        "bisexual protagonist",
        "bi man",
        "bi woman",
        "pansexual",
        "queer love story",
        "queer romance",
        "lgbtq+ lead",
        "lgbtq+ romance",
    ],
    "trans_nonbinary_lead": [
        "trans man",
        "trans woman",
        "transgender man",
        "transgender woman",
        "trans protagonist",
        "nonbinary character",
        "non-binary character",
        "genderqueer",
        "genderfluid",
        "discovering their gender identity",
        "transitioning",
    ],
    "bipoc_lead": [
        "Black protagonist",
        "Black woman",
        "Black man",
        "African Americans",
        "African American woman",
        "African American man",
        "Asian American heroine",
        "Asian American hero",
        "Latina protagonist",
        "Latino protagonist",
        "Latinx family",
        "Indian boy",
        "Indian girl",
        "Indian",
        "Indigenous girl",
        "Indigenous boy",
        "Native American community",
        "Pacific Islander",
        "story of a young woman of color",
        "story of a young man of color",
    ],
    "disabled_lead": [
        "wheelchair user",
        "weelchair-bound",
        "bum leg",
        "living with chronic pain",
        "chronic illness",
        "visually impaired",
        "legally blind",
        "hard of hearing",
        "hearing aid",
        "prosthetic leg",
        "mobility aid",
        "disabled protagonist",
    ],
    "neurodivergent_lead": [
        "autistic girl",
        "autistic boy",
        "on the spectrum",
        "diagnosed with autism",
        "adhd",
        "attention deficit",
        "struggles with focus",
        "hyperfocus",
        "dyslexic",
        "reading is a struggle",
        "neurodivergent",
    ],
    "faith_centered_lead": [
        "Christian faith",
        "Christian romance",
        "trusting God",
        "Muslim",
        "Jewish",
        "guided by faith",
        "inspirational romance",
        "inspirational novel",
        "church community",
        "wrestling with her faith",
        "wrestling with his faith",
    ],

    # === CONTENT WARNING BUCKETS ===
    "cw_violence_gore": [
        "graphic violence",
        "blood and gore",
        "bloody battle",
        "gruesome scenes",
        "brutal fight",
        "violent attack",
        "tortured for information",
        "torture chamber",
        "war-torn city",
        "shooting"
        "school shooting",
        "explosion",
    ],
    "cw_abuse": [
        "abusive relationship",
        "domestic abuse",
        "domestic violence",
        "emotionally abusive partner",
        "physically abusive",
        "controlling partner",
        "manipulative partner",
        "gaslighting",
        "slaves",
        "slavery",
        "human trafficking",
        "raped",
        "rape",
    ],
    "cw_bullying_harassment": [
        "relentless bullying",
        "bullied at school",
        "bullied by her classmates",
        "bullying from his teammates",
        "harassed at work",
        "sexual harassment",
        "online harassment",
    ],
    "cw_self_harm_suicide": [
        "self-harm",
        "self harm",
        "suicidal thoughts",
        "suicidal ideation",
        "attempts suicide",
        "suicide attempt",
        "tries to end her life",
        "tries to end his life",
    ],
    "cw_substance_use": [
        "struggles with addiction",
        "battling addiction",
        "substance abuse",
        "alcoholism",
        "drinking problem",
        "hooked on pills",
        "drug addiction",
    ],
    "cw_medical_trauma": [
        "terminal diagnosis",
        "life-threatening illness",
        "fight against cancer",
        "hospital bed",
        "intensive care unit",
        "near-fatal accident",
        "medical trauma",
    ],
    "cw_kidnapping_captivity": [
        "kidnapped",
        "abducted",
        "taken hostage",
        "held against her will",
        "held against his will",
        "locked in a room",
        "forced captivity",
        "snatched",
    ],
    "cw_trauma_ptsd": [
        "haunted by the past",
        "cannot escape her memories",
        "cannot escape his memories",
        "flashbacks to the war",
        "relives the trauma",
        "post-traumatic stress",
        "ptsd",
        "survivor of abuse",
    ],
}


In [None]:
#function to convert list of patterns to comma-separated strings
def patterns_to_raw_tags(patterns):
    return ", ".join(patterns)

for trope_id, patterns in blurb_match_rules.items():
    mask = tropes["trope_id"] == trope_id
    if mask.any():
        tropes.loc[mask, "raw_tags"] = patterns_to_raw_tags(patterns)

In [None]:
# see all trope_ids with their categories and raw_tags
tropes[["trope_id", "category", "raw_tags"]]

Unnamed: 0,trope_id,category,raw_tags
0,action_adventure,genre,"journey, adventure, epic tale, adventures, que..."
1,contemporary_fiction,genre,"contemporary novel, set in the present day, mo..."
2,fantasy,genre,"fantasy world, magical kingdom, epic fantasy, ..."
3,science_fiction,genre,"science fiction, superhero, sci-fi, distant fu..."
4,dystopian_post_apocalyptic,genre,"dystopian future, dystopian world, totalitaria..."
5,horror,genre,"horror novel, terrifying, unsettling, haunted ..."
6,mystery_crime_thriller,genre,"murder mystery, whodunit, detective novel, pri..."
7,romance,genre,"slow-burn romance, enemies to lovers, wlw roma..."
8,historical_fiction,genre,"historical novel, set in, war-torn, in the aft..."
9,paranormal_supernatural,genre,"paranormal romance, supernatural forces, ghost..."


In [61]:
#print a portion of the trope vocab to ensure correctness
tropes[tropes["category"] == "genre"][["trope_id", "name", "raw_tags"]].head()

Unnamed: 0,trope_id,name,raw_tags
0,action_adventure,Action & Adventure,"journey, adventure, epic tale, adventures, que..."
1,contemporary_fiction,Contemporary / General Fiction,"contemporary novel, set in the present day, mo..."
2,fantasy,Fantasy,"fantasy world, magical kingdom, epic fantasy, ..."
3,science_fiction,Science Fiction,"science fiction, superhero, sci-fi, distant fu..."
4,dystopian_post_apocalyptic,Dystopian / Post-Apocalyptic,"dystopian future, dystopian world, totalitaria..."


In [63]:
#convert to sheet to be able to use 
tropes = tropes.loc[:, ~tropes.columns.str.contains(r"^Unnamed")]
tropes.to_csv("updated_model_trope_vocabulary.csv", index=False)