In [1]:
from helpers import database
import pandas as pd
import re
import plotly.express as px

In [2]:
reviews = database.get_reviews("english")
reviews.head()

Unnamed: 0,id,recommendation_id,author_steamid,author_num_games_owned,author_num_reviews,author_playtime_forever,author_playtime_last_two_weeks,author_playtime_at_review,author_last_played,language,...,timestamp_updated,voted_up,votes_up,votes_funny,weighted_vote_score,comment_count,steam_purchase,received_for_free,written_during_early_access,primarily_steam_deck
0,215760124,,76561199565605732,0,4,325,325,264,1768155336,english,...,1768151687,True,0,0,0.5,0,True,False,False,False
1,215756568,,76561198026330868,39,10,4201,2559,4150,1768154369,english,...,1768148999,True,0,0,0.5,0,True,False,False,False
2,215744888,,76561198066200885,221,44,5543,0,5543,1739371844,english,...,1768140031,True,0,0,0.5,0,True,False,False,False
3,215742083,,76561198880718555,0,8,3239,2734,3147,1768155154,english,...,1768137616,True,0,0,0.5,0,True,False,False,False
4,215740976,,76561199486130374,14,3,200,200,200,1768055486,english,...,1768136629,True,0,0,0.5,0,False,False,False,False


In [3]:
total_reviews = len(reviews)
total_reviews

12775

In [5]:
bosses_aliases = {

    # Main boss
     "Materia Guardian": [
        "Materia Guardian"
    ],
    "Midgardsormr": [
        "Midgardsormr"
    ],
    "Rude": [
        "Rude" # Need to distinguish boss fight or character
    ],
    "Elena": [
        "Elena" # Need to distinguish boss fight or character
    ],
    "Mythril Golem": [
        "Mythril Golem"
    ],
    "Terror of the Deep": [
        "Terror of the Deep"
    ],
    "Roche": [
        "Roche" # Need to distinguish boss fight or character
    ],
    "Jenova Emergent": [
        "Jenova Emergent"
    ],
    "Grasptropod": [
        "Grasptropod"
    ],
    "Custom Valkyrie": [
        "Custom Valkyrie"
    ],
    "Gigatrice": [
        "Gigatrice"
    ],
    "Dyne": [
        "Dyne"
    ],
    "Anuran Suppressor": [
        "Anuran Suppressor"
    ],
    "Specimen H1024": [
        "Specimen H1024", "H1024"
    ],
    "Crimson Mare Mk. II": [
        "Crimson Mare Mk. II", "Mk2", "MkII"
    ],
    "Gi Nattak": [
        "Gi Nattak"
    ],
    "Diabolic Variant": [
        "Diabolic Variant"
    ],
    "Yin and Yang": [
        "Yin and Yang", "Yin Yang"
    ],
    "Forgotten Specimen": [
        "Forgotten Specimen"
    ],
    "Galian Beast": [
        "Galian Beast"
    ],
    "Abzu": [
        "Abzu"
    ],
    "Rufus": [
        "Rufus Shinra", "Rufus" # Need to distinguish boss fight or character
    ],
    "Red Dragon": [
        "Red Dragon"
    ],
    "Ironclad": [
        "Ironclad"
    ],
    "Reno": [
        "Reno"  # Need to distinguish boss fight or character
    ],
    "Tseng": [
        "Tseng" # Need to distinguish boss fight or character
    ],
    "Moss Grown Adamantoise": [
        "Moss Grown Adamantoise"
    ],
    "Demon Gate": [
        "Demon Gate"
    ],
    "Jenova Lifeclinger": [
        "Jenova Lifeclinger"
    ],
    "Sephiroth Reborn": [
        "Sephiroth Reborn"
    ],

    # Combat Simulator Bosses
    "Titan": [
        "Titan"
    ],
    "Phoenix": [
        "Phoenix"
    ],
    "Alexander": [
        "Alexander"
    ],
    "Kujata": [
        "Kujata"
    ],
    "Bahamut Arisen": [
        "Bahamut Arisen"
    ],
    "Odin": [
        "Odin"
    ],

    # World Intel Bosses
    "Quetzalcoatl": [
        "Quetzalcoatl"
    ],
    "Mindflayer": [
        "Mindflayer"
    ],
    "Tonberry King": [
        "Tonberry King"
    ],
    "Great Malboro": [
        "Great Malboro"
    ],
    "Jabberwock": [
        "Jabberwock"
    ],
    "King Zu": [
        "King Zu"
    ],
    "Gilgamesh": [
        "Gilgamesh"
    ],

    # Side Quest Bosses
    "White Terror": [
        "White Terror"
    ],
    "Hell Rider II": [
        "Hell Rider II"
    ],
    "Sandstorm Drake": [
        "Sandstorm Drake"
    ],
    "White Mousse": [
        "White Mousse"
    ],
    "Dark Claw": [
        "Dark Claw"
    ],
    "Gigantoad": [
        "Gigantoad"
    ],
    "Gorgon Mane": [
        "Gorgon Mane"
    ],
    "Levridon": [
        "Levridon"
    ],
    "Mastodons": [
        "Mastodons"
    ],
    "Irasceros": [
        "Irasceros"
    ],
    "Queen Bee": [
        "Queen Bee"
    ],
    "Dranabarga": [
        "Dranabarga"
    ],
    "Gigaworm": [
        "Gigaworm"
    ],
    "Joker": [
        "Joker"
    ],

    # Minigame Bosses
    "Test 0": [
        "Test 0"
    ],
}

In [11]:
def build_pattern(aliases):
    parts = []
    for alias in aliases:
        alias = alias.strip()
        tokens = alias.split()
        phrase = r"\s+".join(re.escape(t) for t in tokens)
        parts.append(rf"\b{phrase}\b")
    pattern = "|".join(parts) if parts else r"$^"
    return re.compile(pattern, flags=re.IGNORECASE)

bosses_regex = {
    boss: build_pattern(aliases)
    for boss, aliases in bosses_aliases.items()
}


In [13]:
rows = []
for boss, regex in bosses_regex.items():
    mask = reviews["review"].str.contains(regex, na=False, regex=True)
    mentions = int(mask.sum())
    positivity = reviews.loc[mask, "voted_up"].mean() if mentions > 0 else None
    rows.append({
        "boss": boss,
        "mentions": mentions,
        "share": mentions / total_reviews if total_reviews else 0.0,
        "positivity": float(positivity) if pd.notna(positivity) else None
    })

boss_df = pd.DataFrame(rows)
boss_df.head()


Unnamed: 0,boss,mentions,share,positivity
0,Materia Guardian,0,0.0,
1,Midgardsormr,3,0.000235,0.666667
2,Rude,7,0.000548,0.857143
3,Elena,11,0.000861,0.727273
4,Mythril Golem,1,7.8e-05,1.0


In [14]:
min_mentions = 1
boss_df = boss_df[boss_df["mentions"] >= min_mentions].copy()
boss_df

Unnamed: 0,boss,mentions,share,positivity
1,Midgardsormr,3,0.000235,0.666667
2,Rude,7,0.000548,0.857143
3,Elena,11,0.000861,0.727273
4,Mythril Golem,1,7.8e-05,1.0
6,Roche,13,0.001018,0.769231
11,Dyne,23,0.0018,0.347826
15,Gi Nattak,5,0.000391,1.0
19,Galian Beast,1,7.8e-05,1.0
21,Rufus,34,0.002661,0.529412
22,Red Dragon,6,0.00047,0.5


In [15]:
boss_df = boss_df.sort_values("positivity", ascending=True, na_position="last").reset_index(drop=True)
boss_df


Unnamed: 0,boss,mentions,share,positivity
0,Tseng,1,7.8e-05,0.0
1,Joker,2,0.000157,0.0
2,Alexander,3,0.000235,0.333333
3,Dyne,23,0.0018,0.347826
4,White Terror,2,0.000157,0.5
5,Red Dragon,6,0.00047,0.5
6,Rufus,34,0.002661,0.529412
7,Midgardsormr,3,0.000235,0.666667
8,Gilgamesh,18,0.001409,0.666667
9,Mindflayer,3,0.000235,0.666667


In [16]:
fig = px.bar(
    boss_df,
    x="positivity",
    y="boss",
    orientation="h",
    text="mentions",
    labels={
        "positivity": "Sentiment",
        "boss": "Boss (at least 10 mentions)",
        "mentions": "Mentions"},
    color="positivity",
    color_continuous_scale=[(0, "red"), (1, "green")],
    range_x=[0, 1],
    title="Boss sentiment & mentions"
)

fig.update_traces(texttemplate="%{text:,}", textposition="inside")
fig.update_layout(height=max(500, 28 * len(boss_df)), coloraxis_showscale=False)
fig.update_xaxes(tickformat=".0%")
fig.update_traces(hovertemplate="<b>%{y}</b><br>Positivity: %{x:.1%}<br>Mentions: %{text:,}<extra></extra>")
fig.show()

In [17]:
boss_df = boss_df.sort_values("mentions", ascending=True, na_position="last").reset_index(drop=True)
boss_df["positivity_label"] = boss_df["positivity"].map(lambda v: f"{v:.1%}")
boss_df

Unnamed: 0,boss,mentions,share,positivity,positivity_label
0,Tseng,1,7.8e-05,0.0,0.0%
1,Kujata,1,7.8e-05,1.0,100.0%
2,Galian Beast,1,7.8e-05,1.0,100.0%
3,Test 0,1,7.8e-05,1.0,100.0%
4,Jenova Lifeclinger,1,7.8e-05,1.0,100.0%
5,Mythril Golem,1,7.8e-05,1.0,100.0%
6,White Terror,2,0.000157,0.5,50.0%
7,Joker,2,0.000157,0.0,0.0%
8,Bahamut Arisen,2,0.000157,1.0,100.0%
9,King Zu,3,0.000235,1.0,100.0%


In [19]:
fig = px.bar(
    boss_df,
    x="mentions",
    y="boss",
    orientation="h",
    text="positivity_label",
    labels={
        "mentions": "Mentions",
        "boss": "Boss (at least 10 mentions)",
        "positivity": "Sentiment"},
    color="positivity",
    color_continuous_scale=[(0, "red"), (1, "green")],
    title="Boss mentions & sentiment"
)
fig.update_traces(textposition="inside")
fig.update_layout(
    height=max(500, 28 * len(boss_df)),
    coloraxis_showscale=False)
fig.update_traces(hovertemplate="<b>%{y}</b><br>Mentions: %{x:,}<br>Positivity: %{customdata:.1%}<extra></extra>", customdata=boss_df["positivity"])
fig.show()