In [2]:
from bs4 import BeautifulSoup
import pandas as pd
import re

with open("Diablo_IV_Patch_Notes.html", "r", encoding="utf-8") as f:
    soup = BeautifulSoup(f, "html.parser")

data = []

panels = soup.find_all("div", class_="panel")

for panel in panels:
    
    # Extract version/build from panel-title
    title_div = panel.find("div", class_="panel-title")
    if not title_div:
        continue
    
    title_text = title_div.get_text(strip=True)
    
    # Extract version (e.g., 2.5.3)
    version_match = re.search(r"\d+\.\d+\.\d+", title_text)
    version = version_match.group(0) if version_match else "UNKNOWN"
    
    # Extract build number
    build_match = re.search(r"Build\s+#?(\d+)", title_text)
    build = build_match.group(1) if build_match else "UNKNOWN"
    
    # Extract panel body
    body = panel.find("div", class_="panel-body")
    if not body:
        continue
    
    # Extract all bullet points within this patch
    bullets = body.find_all("li")
    
    for idx, li in enumerate(bullets):
        text = li.get_text(separator=" ", strip=True)
        if text:
            data.append({
                "version": version,
                "build": build,
                "bullet_text": text
            })

df_bullets = pd.DataFrame(data)

print("Total bullets extracted:", len(df_bullets))
df_bullets.head()

Total bullets extracted: 863


Unnamed: 0,version,build,bullet_text
0,2.5.3,70356,Fixed an issue where the Executioner Monster A...
1,2.5.3,70356,Fixed an issue where certain Silent Chests in ...
2,2.5.3,70356,Fixed an issue where Zagraal in the Dark Citad...
3,2.5.3,70356,Fixed an issue where some Tower bosses had sig...
4,2.5.3,70356,Fixed an issue where an error would occur when...


In [3]:
df_bullets["version"].value_counts()

version
2.5.0    736
2.5.2     80
2.5.1     36
2.5.3     11
Name: count, dtype: int64

In [4]:
df_bullets["version"].value_counts()

version
2.5.0    736
2.5.2     80
2.5.1     36
2.5.3     11
Name: count, dtype: int64

In [5]:
df_bullets.groupby("version").head(3)

Unnamed: 0,version,build,bullet_text
0,2.5.3,70356,Fixed an issue where the Executioner Monster A...
1,2.5.3,70356,Fixed an issue where certain Silent Chests in ...
2,2.5.3,70356,Fixed an issue where Zagraal in the Dark Citad...
11,2.5.2,70156,Tower now unlocks Tiers the same way the Pit d...
12,2.5.2,70156,Monster populations have been tuned to be fair...
13,2.5.2,70156,Bosses have been adjusted to remove unwanted b...
91,2.5.1,69864,Updated descriptions and tooltips for Heavenly...
92,2.5.1,69864,The Season Rank objective for clearing Pit Tie...
93,2.5.1,69864,Various UI consistency and clarity improvement...
127,2.5.0,69713,Upgrading an affix to a Greater Affix can now ...


In [6]:
df_bullets[df_bullets["version"] == "2.5.3"].head(10)

Unnamed: 0,version,build,bullet_text
0,2.5.3,70356,Fixed an issue where the Executioner Monster A...
1,2.5.3,70356,Fixed an issue where certain Silent Chests in ...
2,2.5.3,70356,Fixed an issue where Zagraal in the Dark Citad...
3,2.5.3,70356,Fixed an issue where some Tower bosses had sig...
4,2.5.3,70356,Fixed an issue where an error would occur when...
5,2.5.3,70356,Fixed an issue where resetting a piece of mast...
6,2.5.3,70356,Fixed an issue where the reward for defeating ...
7,2.5.3,70356,Fixed an issue where other Divine Gifts could ...
8,2.5.3,70356,Fixed an issue where some cosmetic items would...
9,2.5.3,70356,Fixed various instances of Placeholder assets ...


In [None]:
from bs4 import BeautifulSoup
import pandas as pd
import re

with open("Diablo_IV_Patch_Notes.html", "r", encoding="utf-8") as f:
    soup = BeautifulSoup(f, "html.parser")

data = []

panels = soup.find_all("div", class_="panel")

for panel in panels:
    
    # Extract version/build
    title_div = panel.find("div", class_="panel-title")
    if not title_div:
        continue
    
    title_text = title_div.get_text(strip=True)
    
    version_match = re.search(r"\d+\.\d+\.\d+", title_text)
    version = version_match.group(0) if version_match else "UNKNOWN"
    
    build_match = re.search(r"Build\s+#?(\d+)", title_text)
    build = build_match.group(1) if build_match else "UNKNOWN"
    
    # Extract panel body
    body = panel.find("div", class_="panel-body")
    if not body:
        continue
    
    # Extract date (look for Month Day, Year format)
    body_text = body.get_text(separator=" ", strip=True)
    date_match = re.search(
        r"(January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},\s+\d{4}",
        body_text
    )
    date = date_match.group(0) if date_match else "UNKNOWN"
    
    bullets = body.find_all("li")
    
    for li in bullets:
        text = li.get_text(separator=" ", strip=True)
        if text:
            data.append({
                "version": version,
                "build": build,
                "date": date,
                "bullet_text": text
            })

df_bullets = pd.DataFrame(data)

print("Total bullets extracted:", len(df_bullets))
df_bullets.head()

Total bullets extracted: 863


Unnamed: 0,version,build,date,bullet_text
0,2.5.3,70356,UNKNOWN,Fixed an issue where the Executioner Monster A...
1,2.5.3,70356,UNKNOWN,Fixed an issue where certain Silent Chests in ...
2,2.5.3,70356,UNKNOWN,Fixed an issue where Zagraal in the Dark Citad...
3,2.5.3,70356,UNKNOWN,Fixed an issue where some Tower bosses had sig...
4,2.5.3,70356,UNKNOWN,Fixed an issue where an error would occur when...


In [9]:
df_bullets[["version", "date"]].drop_duplicates()

Unnamed: 0,version,date
0,2.5.3,UNKNOWN
11,2.5.2,UNKNOWN
91,2.5.1,UNKNOWN
127,2.5.0,UNKNOWN


In [10]:
df_bullets[["version", "build", "date"]].drop_duplicates().sort_values("version")

Unnamed: 0,version,build,date
127,2.5.0,69713,UNKNOWN
91,2.5.1,69864,UNKNOWN
11,2.5.2,70156,UNKNOWN
0,2.5.3,70356,UNKNOWN


In [11]:
print(panels[0].prettify()[:3000])

<div class="panel">
 <div class="panel-heading">
  <div class="panel-title">
   <a data-parent="#accordion" data-toggle="collapse" href="https://news.blizzard.com/en-us/article/24244466/diablo-iv-patch-notes#2.5.3">
    2.5.3 Build #70356 (All Platforms)—January 28, 2026
   </a>
  </div>
 </div>
 <div aria-expanded="false" class="panel-collapse collapse" id="2.5.3" style="">
  <div class="panel-body">
   <!-- Patch Notes -->
   <h1 style="text-align: center; color: rgb(139, 0, 0);">
    Bug Fixes
   </h1>
   <ul>
    <li>
     Fixed an issue where the Executioner Monster Affix sound effect played continuously.
    </li>
   </ul>
   <blockquote>
    <p>
     <strong style="color: rgb(236, 112, 99);">
      Developer’s Note:
     </strong>
     <em>
      This affix will be re-enabled with the release of this patch.
     </em>
    </p>
   </blockquote>
   <ul>
    <li>
     Fixed an issue where certain Silent Chests in Nahantu did not count towards the Season Rank objective Test of Luck.

In [12]:
print(panels[0].prettify()[:4000])

<div class="panel">
 <div class="panel-heading">
  <div class="panel-title">
   <a data-parent="#accordion" data-toggle="collapse" href="https://news.blizzard.com/en-us/article/24244466/diablo-iv-patch-notes#2.5.3">
    2.5.3 Build #70356 (All Platforms)—January 28, 2026
   </a>
  </div>
 </div>
 <div aria-expanded="false" class="panel-collapse collapse" id="2.5.3" style="">
  <div class="panel-body">
   <!-- Patch Notes -->
   <h1 style="text-align: center; color: rgb(139, 0, 0);">
    Bug Fixes
   </h1>
   <ul>
    <li>
     Fixed an issue where the Executioner Monster Affix sound effect played continuously.
    </li>
   </ul>
   <blockquote>
    <p>
     <strong style="color: rgb(236, 112, 99);">
      Developer’s Note:
     </strong>
     <em>
      This affix will be re-enabled with the release of this patch.
     </em>
    </p>
   </blockquote>
   <ul>
    <li>
     Fixed an issue where certain Silent Chests in Nahantu did not count towards the Season Rank objective Test of Luck.

In [13]:
from bs4 import BeautifulSoup
import pandas as pd
import re

# Load HTML
with open("Diablo_IV_Patch_Notes.html", "r", encoding="utf-8") as f:
    soup = BeautifulSoup(f, "html.parser")

data = []

# Each patch is inside a div.panel
panels = soup.find_all("div", class_="panel")

for panel in panels:

    # --- Extract Title Block ---
    title_div = panel.find("div", class_="panel-title")
    if not title_div:
        continue

    title_text = title_div.get_text(strip=True)

    # --- Extract Version ---
    version_match = re.search(r"\d+\.\d+\.\d+", title_text)
    version = version_match.group(0) if version_match else "UNKNOWN"

    # --- Extract Build ---
    build_match = re.search(r"Build\s+#?(\d+)", title_text)
    build = build_match.group(1) if build_match else "UNKNOWN"

    # --- Extract Date (embedded in title text) ---
    date_match = re.search(
        r"(January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},\s+\d{4}",
        title_text
    )
    date = date_match.group(0) if date_match else "UNKNOWN"

    # --- Extract Patch Body ---
    body = panel.find("div", class_="panel-body")
    if not body:
        continue

    # Extract true bullet points
    bullets = body.find_all("li")

    for li in bullets:
        text = li.get_text(separator=" ", strip=True)

        if text:
            data.append({
                "version": version,
                "build": build,
                "date": date,
                "bullet_text": text
            })

# Create DataFrame
df_bullets = pd.DataFrame(data)

print("Total bullets extracted:", len(df_bullets))

# Verify structure
display(
    df_bullets[["version", "build", "date"]]
    .drop_duplicates()
    .sort_values("version")
)

Total bullets extracted: 863


Unnamed: 0,version,build,date
127,2.5.0,69713,"December 11, 2025"
91,2.5.1,69864,"December 18, 2025"
11,2.5.2,70156,"January 12, 2026"
0,2.5.3,70356,"January 28, 2026"


In [14]:
import re

# -----------------------------
# 1. BUGFIX DETECTION
# -----------------------------
def detect_bugfix(text):
    text_lower = text.lower()
    bugfix_signals = [
        "fixed an issue",
        "resolved an issue",
        "corrected an issue",
        "addressed an issue",
        "fixed an amount"
    ]
    return any(signal in text_lower for signal in bugfix_signals)


# -----------------------------
# 2. UI IMPROVEMENT DETECTION
# -----------------------------
def detect_ui(text):
    text_lower = text.lower()
    ui_keywords = [
        "ui",
        "tooltip",
        "screen reader",
        "display",
        "visual indicator",
        "menu",
        "map",
        "icon",
        "clarity",
        "text updated",
        "description updated"
    ]
    return any(word in text_lower for word in ui_keywords)


# -----------------------------
# 3. QUALITY OF LIFE DETECTION
# -----------------------------
def detect_qol(text):
    text_lower = text.lower()

    qol_keywords = [
        "now spawns closer",
        "removed the delay",
        "no longer required",
        "unlock earlier",
        "unlock at level",
        "respawn closer",
        "added audio cue",
        "improved accessibility",
        "no longer need",
        "portal now",
        "will never respawn"
    ]

    return any(phrase in text_lower for phrase in qol_keywords)


# -----------------------------
# 4. NUMERIC DIRECTION DETECTION
# -----------------------------
def detect_numeric_direction(text):
    text_lower = text.lower()

    # Detect "from X to Y"
    pattern = r"from\s+(\d+\.?\d*)%?\s+to\s+(\d+\.?\d*)%?"
    matches = re.findall(pattern, text_lower)

    for before, after in matches:
        before = float(before)
        after = float(after)

        if after > before:
            return "Buff"
        elif after < before:
            return "Nerf"

    return None


# -----------------------------
# 5. DIRECTIONAL KEYWORD LOGIC
# -----------------------------
def detect_directional_keywords(text):
    text_lower = text.lower()

    # COST LOGIC
    if "cost reduced" in text_lower:
        return "Buff"
    if "cost increased" in text_lower:
        return "Nerf"

    # DROP RATE LOGIC
    if "rarer" in text_lower:
        return "Nerf"
    if "drop rate increased" in text_lower or "more items now drop" in text_lower:
        return "Buff"

    # MONSTER TUNING
    if "monster" in text_lower or "boss" in text_lower:
        if "increased" in text_lower or "tuned up" in text_lower:
            return "Nerf"
        if "reduced" in text_lower or "tuned down" in text_lower or "lowered" in text_lower:
            return "Buff"

    # GENERAL BUFF/NERF WORDS
    if "increased" in text_lower:
        return "Buff"
    if "reduced" in text_lower:
        return "Nerf"
    if "lowered" in text_lower:
        return "Buff"
    if "removed" in text_lower:
        return "Nerf"

    return None


# -----------------------------
# MAIN CLASSIFIER (HIERARCHICAL)
# -----------------------------
def classify_bullet(text):

    # 1️⃣ Bugfix
    if detect_bugfix(text):
        return "Bugfix"

    # 2️⃣ UI
    if detect_ui(text):
        return "UI_Improvement"

    # 3️⃣ Quality of Life
    if detect_qol(text):
        return "Quality_of_Life"

    # 4️⃣ Numeric Direction
    numeric_result = detect_numeric_direction(text)
    if numeric_result:
        return numeric_result

    # 5️⃣ Keyword Direction
    keyword_result = detect_directional_keywords(text)
    if keyword_result:
        return keyword_result

    # 6️⃣ Fallback
    return "Neutral"