In [None]:
# -----------------------------------------------------------
# Import Required Libraries
# -----------------------------------------------------------

import re              # For pattern matching
import pandas as pd    # For structured data handling

In [2]:
# -----------------------------------------------------------
# Define Parsing Function
# -----------------------------------------------------------

def parse_patch_notes(filepath):
    '''
    Reads the Diablo IV patch notes file.
    Parses patch headers, sections, and change entries.
    Merges "Previous" and "Now" lines into single comparison entries.
    Returns a clean pandas DataFrame.
    '''

    # ----------------------------
    # STEP 1: Read file
    # ----------------------------

    with open(filepath, "r", encoding="utf-8") as f:
        text = f.read()

    lines = [line.strip() for line in text.splitlines() if line.strip()]

    # ----------------------------
    # STEP 2: Define patch header pattern
    # ----------------------------

    patch_pattern = re.compile(r"(\d+\.\d+\.\d+)\s+Build\s+#(\d+).*â€”(.+)")

    # ----------------------------
    # STEP 3: Prepare storage
    # ----------------------------

    records = []

    current_patch = None
    current_build = None
    current_date = None
    current_section = None

    # ----------------------------
    # STEP 4: Loop through lines
    # ----------------------------

    i = 0

    while i < len(lines):

        line = lines[i]

        # A) Detect Patch Header
        patch_match = patch_pattern.match(line)

        if patch_match:
            current_patch = patch_match.group(1)
            current_build = patch_match.group(2)
            current_date = patch_match.group(3)
            i += 1
            continue

        # B) Detect Major Sections
        if line in [
            "Bug Fixes", "Game Updates", "Balance Update",
            "Base Game", "Expansion", "Accessibility",
            "Skills", "Passives", "Items",
            "Legendary Aspects", "Paragon",
            "Tempering", "Miscellaneous"
        ]:
            current_section = line
            i += 1
            continue

        # C) Detect Previous/Now comparison
        if line.startswith("Previous"):

            previous_text = line.replace("Previous:", "").strip()

            if i + 1 < len(lines) and lines[i + 1].startswith("Now"):

                now_text = lines[i + 1].replace("Now:", "").strip()

                records.append({
                    "patch": current_patch,
                    "build": current_build,
                    "date": current_date,
                    "section": current_section,
                    "change_type": "comparison",
                    "previous": previous_text,
                    "now": now_text,
                    "full_text": f"Changed from {previous_text} to {now_text}"
                })

                i += 2
                continue

        # D) All other lines treated as single entries
        records.append({
            "patch": current_patch,
            "build": current_build,
            "date": current_date,
            "section": current_section,
            "change_type": "single",
            "previous": None,
            "now": None,
            "full_text": line
        })

        i += 1

    df = pd.DataFrame(records)

    return df