In [2]:
from bs4 import BeautifulSoup
import json

file_path = r"C:\Users\renai\OneDrive\College\Info 519 NLP and Deep Learning\Group Project\Project Files\Diablo_IV_Patch_Notes.html"

with open(file_path, "r", encoding="utf-8") as f:
    soup = BeautifulSoup(f, "html.parser")

print("HTML loaded successfully")

panels = soup.find_all("div", class_="panel")
print("Number of patch panels found:", len(panels))

HTML loaded successfully
Number of patch panels found: 4


In [3]:
import re

patch_data = []

for panel in panels:
    
    # Extract header text (version + build + date)
    header = panel.find("a")
    header_text = header.get_text(strip=True) if header else "Unknown"
    
    # Extract patch body
    body_div = panel.find("div", class_="panel-body")
    body_text = body_div.get_text(separator="\n", strip=True) if body_div else ""
    
    patch_data.append({
        "header": header_text,
        "content": body_text
    })

print("Extracted patches:", len(patch_data))
print("\nFirst patch header:\n", patch_data[0]["header"])

Extracted patches: 4

First patch header:
 2.5.3 Build #70356 (All Platforms)—January 28, 2026


In [5]:
structured_patches = []

for patch in patch_data:
    header = patch["header"]
    
    # Extract version
    version_match = re.search(r"(\d+\.\d+\.\d+)", header)
    version = version_match.group(1) if version_match else None
    
    # Extract build number
    build_match = re.search(r"Build\s+#(\d+)", header)
    build = build_match.group(1) if build_match else None
    
    # Extract date
    date_match = re.search(r"—(.+)", header)
    date = date_match.group(1).strip() if date_match else None
    
    structured_patches.append({
        "version": version,
        "build": build,
        "date": date,
        "content": patch["content"]
    })

print(structured_patches[0])

{'version': '2.5.3', 'build': '70356', 'date': 'January 28, 2026', 'content': "Bug Fixes\nFixed an issue where the Executioner Monster Affix sound effect played continuously.\nDeveloper’s Note:\nThis affix will be re-enabled with the release of this patch.\nFixed an issue where certain Silent Chests in Nahantu did not count towards the Season Rank objective Test of Luck.\nFixed an issue where Zagraal in the Dark Citadel didn't drop loot.\nFixed an issue where some Tower bosses had significantly more health than others.\nFixed an issue where an error would occur when trying to view the profile of a leaderboard entry and the player had a private profile.\nFixed an issue where resetting a piece of masterworked gear did not unlock the Recycled Works challenge.\nFixed an issue where the reward for defeating all the Lesser Evils at once was only given to the player who opened the chest in Local Co-op play.\nFixed an issue where other Divine Gifts could stop gaining experience if another Divi

In [6]:
with open("diablo_iv_patches_structured.json", "w", encoding="utf-8") as f:
    json.dump(structured_patches, f, indent=4)

print("Saved successfully.")

Saved successfully.


In [7]:
import pandas as pd

df = pd.DataFrame(structured_patches)

df["date"] = pd.to_datetime(df["date"])
df["word_count"] = df["content"].apply(lambda x: len(x.split()))
df["char_count"] = df["content"].apply(len)

df.sort_values("date", inplace=True)

df

Unnamed: 0,version,build,date,content,word_count,char_count
3,2.5.0,69713,2025-12-11,Note:\nAdditional changes made since the PTR’s...,10650,66650
2,2.5.1,69864,2025-12-18,Game Updates\nBase Game\nUpdated descriptions ...,661,4061
1,2.5.2,70156,2026-01-12,Game Updates\nBase Game\nThe Tower and Leaderb...,1116,6674
0,2.5.3,70356,2026-01-28,Bug Fixes\nFixed an issue where the Executione...,194,1157


In [8]:
def classify_patch(word_count):
    if word_count > 5000:
        return "Major Update"
    elif word_count > 1000:
        return "Mid Update"
    else:
        return "Minor / Hotfix"

df["patch_type"] = df["word_count"].apply(classify_patch)

df[["version", "date", "word_count", "patch_type"]]

Unnamed: 0,version,date,word_count,patch_type
3,2.5.0,2025-12-11,10650,Major Update
2,2.5.1,2025-12-18,661,Minor / Hotfix
1,2.5.2,2026-01-12,1116,Mid Update
0,2.5.3,2026-01-28,194,Minor / Hotfix
