# 1. Installations and Imports

In [None]:
import torch
import os
import json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tqdm import tqdm
import tiktoken
from dotenv import load_dotenv
import time
import ast
import re
import warnings
warnings.filterwarnings('ignore')

# LangChain Import
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

# Finetuned Model Import
from transformers import BertTokenizer, BertForSequenceClassification
from src.model import PatentSentenceClassifier

# Load OpenaAI API key
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# 2. Utils

This section defines the main functions that are utilized throughout the remainder of the notebook.

In [39]:
def count_tokens(text):
    encoding = tiktoken.get_encoding("cl100k_base")
    return len(encoding.encode(text))

def calculate_rouge_scores_precision(text1, text2, rouge_scorer):
    """
    Calculate ROUGE scores precision between two texts.
    
    Parameters:
    - text1: First text for comparison (typically the generated or processed text)
    - text2: Second text for comparison (typically the original reference text)
    - rouge_scorer: Initialized ROUGEScore object
    
    Returns:
    - Dictionary containing rounded ROUGE precision scores
    """
    score = rouge_scorer(text1, text2)
    
    return {
        'rouge1_precision': round(score['rouge1_precision'].item(), 3),
        'rouge3_precision': round(score['rouge3_precision'].item(), 3),
        'rouge5_precision': round(score['rouge5_precision'].item(), 3),
        'rouge7_precision': round(score['rouge7_precision'].item(), 3),
        'rouge9_precision': round(score['rouge9_precision'].item(), 3),
        'rougeL_precision': round(score['rougeL_precision'].item(), 3),
    }

def prompt_chatgpt(input_text, input_context, prompt, model="gpt-4o", temperature=0, top_p=1):
    
    # Define a prompt template for classification
    prompt_template = PromptTemplate.from_template(prompt)

    # Create an OpenAI LLM instance
    llm = ChatOpenAI(
        model=model,
        temperature=temperature,
        top_p=top_p,
        max_retries=1,
        max_tokens=1000 
    )

    # Create a runnable sequence
    chain = prompt_template | llm | StrOutputParser()

    # Prepare inputs
    inputs = {"input_text": input_text}
    if input_context:
        inputs["input_context"] = input_context

    # Format prompt
    formatted_prompt = prompt_template.format(**inputs)
    ##print(f"Generated Prompt:\n{formatted_prompt}") # Debugging statement

    # Invoke Chain
    output_string = chain.invoke(inputs).strip()

    # Calculate token count
    #input_count = count_tokens(formatted_prompt)
    #output_count = count_tokens(output_string)

    #print(f"Using: model = '{model}'; temperature = {temperature}; top_p = {top_p}") # Debugging statement

    return output_string, formatted_prompt


def classify_text(model, input_text, device='cpu'):
    
    # Tokenize input
    tokenizer = model.tokenizer  # Assuming tokenizer is part of the model
    inputs = tokenizer(input_text, truncation=True, padding=True, max_length=512, return_tensors='pt')
    
    # Move input to the same device as the model
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Define label mapping
    int_to_label = {0: 'FUN', 1: 'STR', 2: 'MIX', 3: 'OTH'}

    # Perform inference
    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=1)[0]
        pred_idx = torch.argmax(probs).item()
        pred_class = int_to_label[pred_idx]

    return pred_class, [round(p, 2) for p in probs.tolist()]


def create_hierarchy(text):
    """
    Arguments:
        text (str): A multiline string where each line begins with one or more '>' characters to indicate hierarchy.

    Returns:
        pd.DataFrame: A DataFrame with the following columns:
            - 'index': Hierarchical index (e.g., '1', '1.1', '1.1.1')
            - 'sentence': The textual content of the line
            - 'parent_indices': List of parent index strings
            - 'parents': List of parent content strings
    """
    
    lines = text.strip().splitlines()
    counters = []
    index_sentence_dict = {}
    rows = []

    for line in lines:
        # Remove the leading '>' used to denote the root node level
        line = line[1:]  # The first '>' is always present, even for root-level items
        
        # Determine level by counting leading '>' characters
        level = len(line) - len(line.lstrip('>'))
        content = line.lstrip('>').strip()
        if not content:
            continue

        # Adjust counters for current level
        if len(counters) <= level:
            counters += [1] * (level + 1 - len(counters))
        else:
            counters = counters[:level + 1]
            counters[level] += 1

        # Build Index
        index = ".".join(map(str, counters[:level + 1]))
        index_sentence_dict[index] = content

        # Generate parent indices and content inline
        parent_indices = [".".join(map(str, counters[:i])) for i in range(1, level + 1)]
        parent_contents = [index_sentence_dict[pidx] for pidx in parent_indices if pidx in index_sentence_dict]

        rows.append({
            "index": index,
            "text": content,
            "parent_indices": parent_indices,
            "parents": parent_contents
        })

    return pd.DataFrame(rows)


def append_result_to_list(results, index, input_text, context=None, rephrasing_prompt=None, 
                         rephrased_text=None, splitting_prompt=None, sentence=None, 
                         pred_class=None, probs=None, rouge_scores=None, error=None):
    """
    Append a single result entry to the results list.
    
    Parameters:
    - results: The list to append results to
    - index: Index level of the hyerarchy
    - input_text: Original claim text
    - context: Parent claim context if available
    - rephrasing_prompt: Prompt used for rephrasing
    - rephrased_text: Text after rephrasing
    - splitting_prompt: Prompt used for splitting
    - sentence: Current sentence being processed
    - pred_class: Predicted classification
    - probs: Classification probabilities
    - rouge_scores: Dict of ROUGE scores (keys: rouge1_precision, rouge3_precision, etc.)
    - error: Error message if processing failed
    
    Returns:
    - The updated results list
    """
    # Create base result dictionary
    result = {
        'index': index,
        'text': input_text,
        'context': context,
        'rephrasing_prompt': rephrasing_prompt,
        'rephrased_text': rephrased_text,
        'splitting_prompt': splitting_prompt,
        'sentence': sentence,
        'pred_class': pred_class,
        'probs': probs,
        'rouge1_precision': None,
        'rouge3_precision': None,
        'rouge5_precision': None,
        'rouge7_precision': None,
        'rouge9_precision': None,
        'rougeL_precision': None,
        'errors': error
    }
    
    # Update with ROUGE scores if provided
    if rouge_scores and not error:
        result.update(rouge_scores)
    
    # Append to results list
    results.append(result)
    
    return results

# 3. Prompts Definition

This section defines the prompts that are utilized throughout the remainder of the notebook.

In [None]:
# =========================================================================================
# Prompt to indent claim
indenting_prompt = """Your task is to format the following patent claim by indenting each logical block of information.
Use  ">" characters to indent the beginning of each block. 

\"{input_text}\"
"""
print(indenting_prompt)

# =========================================================================================
# Prompt to rephrase a text using its context
rephrasing_with_context_prompt = """Your task is to rephrase the given text into Subject-Verb-Object (SVO) structure.
Avoid using pronouns. Instead, repeat the original subject explicitly where needed.

Use the provided context (if any) to resolve references and pronouns in the main text.

Context Format: Supplementary information providing background for the main text.
Input Format: The main text that is to be rephrased.

Context: \"{input_context}\"
Input: \"{input_text}\"
Output:""" 

# =========================================================================================
# Prompt to split a text into sub-sentences using its context
splitting_with_context_prompt = """Your task is to split the given text into sub-sentences, ensuring that:
1. Each sub-sentence must contain only one predicate.
2. Avoid using pronouns. Instead, repeat the original subject explicitly where needed.
3. Do not split inline lists; treat item lists as a single unit.

Use the provided context (if any) to resolve references and pronouns in the main text.

Context Format: Supplementary information providing background for the main text.
Input Format: The main text that is to be split.
Output Format: A list of sub-sentences enclosed in double quotes, separated by commas (e.g., ["sub-sentence1", "sub-sentence2", "sub-sentence3"]).

Context: \"{input_context}\"
Input: \"{input_text}\"
Output:
"""
print(splitting_with_context_prompt)


Your task is to format the following patent claim by indenting each logical block of information.
Use  ">" characters to indent the beginning of each block. 

"{input_text}"

Your task is to split the given text into sub-sentences, ensuring that:
1. Each sub-sentence must contain only one predicate.
2. Avoid using pronouns. Instead, repeat the original subject explicitly where needed.
3. Do not split inline lists; treat item lists as a single unit.

Use the provided context (if any) to resolve references and pronouns in the main text.

Context Format: Supplementary information providing background for the main text.
Input Format: The main text that is to be split.
Output Format: A list of sub-sentences enclosed in double quotes, separated by commas (e.g., ["sub-sentence1", "sub-sentence2", "sub-sentence3"]).

Context: "{input_context}"
Input: "{input_text}"
Output:



# 4. Load Classification Model

In this section, we load the classification model used to categorize claim sentences into four classes: FUN (Functional), STR (Structural), MIX (Mixed), and OTH (Other).

Currently commented out, it serves as a placeholder for integrating the custom classifier you intend to develop.

In [None]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Set path to checkpoint
checkpoint_name = 'bert-large-uncased_train_10_4'; model_name = "bert-large-uncased"
checkpoint_name = 'bert-for-patents_train_10_4'; model_name = "anferico/bert-for-patents" 
checkpoint_path = f"/home/fantoni/patent-sentence-classification/models/finetuning/{checkpoint_name}.ckpt"

# Load Base Tokenizer
bert_tokenizer = BertTokenizer.from_pretrained(model_name)
print('\nBase Tokenizer loaded succesfully.')

# Load Base Model
base_model = BertForSequenceClassification.from_pretrained(model_name, num_labels=4)
print('\nBase model loaded succesfully.')

# Load Finetuned Model
model = PatentSentenceClassifier.load_from_checkpoint(
    checkpoint_path,
    model=base_model,
    tokenizer=bert_tokenizer)

model.eval()
model.to(device)
print(f"\nFinetuned model loaded succesfully. Using: '{checkpoint_name}'")

# Define Finetuned Tokenizer
tokenizer = model.tokenizer

# 5. Import Claim

In this section, we load a .txt file containing a single input claim to be processed by the patent simplification pipeline. Notably, the pipeline is designed to operate on one claim at a time.

In [51]:
# Patents
filepath ="/home/fantoni/patent-sentence-classification/data/claim_simplification/US8695121B2_A42B3.txt"
#filepath ="/home/fantoni/patent-sentence-classification/data/claim_simplification/US11133720B2_H02K3.txt"

# Pavanello Patents
#filepath ="/home/fantoni/patent-sentence-classification/data/claim_simplification/IT-201900008253-A1_B65G1-023.txt"
#filepath ="/home/fantoni/patent-sentence-classification/data/claim_simplification/US-10733341-B1_G06F30-30.txt"
#filepath ="/home/fantoni/patent-sentence-classification/data/claim_simplification/WO-2017216367-A1_C08J5-0405.txt"
#filepath ="/home/fantoni/patent-sentence-classification/data/claim_simplification/WO-2019021161-A1_F16D65-12.txt"
#filepath ="/home/fantoni/patent-sentence-classification/data/claim_simplification/WO-2019243958-A1_F16D55-288.txt"
#filepath ="/home/fantoni/patent-sentence-classification/data/claim_simplification/WO-2020058819-A1_B6078-1706.txt"
#filepath = "/home/fantoni/patent-sentence-classification/data/claim_simplification/WO-2022144719-A1_B6078-261.txt"

with open(filepath, "r") as file:
    input_text = file.read()
    print(input_text)

# Set file name
filename = os.path.splitext(os.path.basename(filepath))[0]

1. A helmet system for removing condensation from a user's field of vision, comprising: a helmet shell having an anterior section, a posterior section, and a venting passage, wherein the helmet shell defines an internal cavity that is in fluid communication with a front portion of the venting passage, and wherein the internal cavity is configured to receive the user's head; a visor coupled with the helmet shell, wherein at least part of the visor defines part of the internal cavity; a humidity sensor positioned within the internal cavity of the helmet shell; and a ventilation system comprising: a base coupled with the helmet shell, wherein the base has a first venting aperture in fluid communication with a rear portion of the venting passage, a base cover coupled with the base, wherein the base cover has a second venting aperture, an air movement assembly disposed between the base and the base cover, wherein the air movement assembly provides fluid communication between the first venti

# 6. Patent Claim Simplification Pipeline

## 6.1 Extract Hierarchy

This is the initial step of the patent simplification pipeline, where a prompt is used to insert indentation and reveal the hierarchical structure of the claim. The prompt is specifically designed to identify indentation levels that reflect the logical relationships and dependencies between different segments of the text.

In [None]:
# Indent text with prompt
output_string, _ = prompt_chatgpt(input_text=input_text, input_context=None, prompt=indenting_prompt, model='gpt-3.5-turbo')
print(output_string)

# Extract hierarchy using a custom function and convert the output into a structured DataFrame
# The extracted indentation levels are parsed into a tabular format for further processing.
df = create_hierarchy(output_string)
df

>1. A helmet system for removing condensation from a user's field of vision, comprising:
>>a helmet shell having an anterior section, a posterior section, and a venting passage, wherein the helmet shell defines an internal cavity that is in fluid communication with a front portion of the venting passage, and wherein the internal cavity is configured to receive the user's head;
>>a visor coupled with the helmet shell, wherein at least part of the visor defines part of the internal cavity;
>>a humidity sensor positioned within the internal cavity of the helmet shell; and
>>a ventilation system comprising:
>>>a base coupled with the helmet shell, wherein the base has a first venting aperture in fluid communication with a rear portion of the venting passage,
>>>a base cover coupled with the base, wherein the base cover has a second venting aperture,
>>>an air movement assembly disposed between the base and the base cover, wherein the air movement assembly provides fluid communication betwe

Unnamed: 0,index,text,parent_indices,parents
0,1,1. A helmet system for removing condensation f...,[],[]
1,1.1,"a helmet shell having an anterior section, a p...",[1],[1. A helmet system for removing condensation ...
2,1.2,"a visor coupled with the helmet shell, wherein...",[1],[1. A helmet system for removing condensation ...
3,1.3,a humidity sensor positioned within the intern...,[1],[1. A helmet system for removing condensation ...
4,1.4,a ventilation system comprising:,[1],[1. A helmet system for removing condensation ...
5,1.4.1,"a base coupled with the helmet shell, wherein ...","[1, 1.4]",[1. A helmet system for removing condensation ...
6,1.4.2,"a base cover coupled with the base, wherein th...","[1, 1.4]",[1. A helmet system for removing condensation ...
7,1.4.3,an air movement assembly disposed between the ...,"[1, 1.4]",[1. A helmet system for removing condensation ...
8,1.4.4,"a switch,","[1, 1.4]",[1. A helmet system for removing condensation ...
9,1.4.5,"a power source, and","[1, 1.4]",[1. A helmet system for removing condensation ...


## 6.2 Rephrasing, Splitting, and Classification

In this section, we perform three main steps:
1. Rephrasing 
2. Splitting  
3. Classification

These steps are applied conditionally based on the structure of the input:

- **Case 1**: For short texts ending with a colon (e.g., *"the camera comprises:"*), we perform only classification (1).  
  Rephrasing in this context introduce errors in the Bill of Materials (BoM) hierarchy.

- **Case 2**: For longer texts that include a parent sentence, we apply all three steps (1), (2) and (3).

- **Case 3**: For short, sentences without a parent node (i.e., root-level sentences), we perform only classification (1).

**Note**: These steps are executed all at once to avoid running multiple prompts for the same sentence, thereby improving efficiency and reducing API cost.

**Note**: Results may differ between ChatGPT (web interface) and the OpenAI API when using the same prompt.  
See the related discussion: [OpenAI Community Thread](https://community.openai.com/t/different-results-same-prompt-on-openai-api-vs-chatgpt/1062995)


In [None]:
import pandas as pd
import re
import ast
from tqdm import tqdm
from torchmetrics.text.rouge import ROUGEScore

# Initialize ROUGE scorer with various n-gram options
# reference: https://medium.com/nlplanet/two-minutes-nlp-learn-the-rouge-metric-by-examples-f179cc285499
rouge_scorer = ROUGEScore(rouge_keys=('rouge1', 'rouge3', 'rouge5', 'rouge7', 'rouge9', 'rougeL'))

# Model configuration
CHATGPT_MODEL = 'gpt-4o' # 'gpt-3.5-turbo'
TEMPERATURE = 0
TOP_P = 1

# Main processing loop
results = []

for i, row in tqdm(df.iterrows(), total=len(df), desc="Processing Claim Texts"):  
    try:
        input_text = row['text']
        index = row['index']
        word_count = len(re.findall(r'\b\w+\b', input_text))
        input_context = None
        rephrasing_prompt = None
        splitting_prompt = None

        # Case 1: Short text ending with colon - classification only
        # Rephrasing such sentences using contextual information can cause errors in the BOM hierarchy, 
        # so they are directly classified without rephrasing.
        if input_text.endswith(':') and word_count <= 20:
            
            # Perform Classification
            pred_class, probs = classify_text(model, input_text, device)
            
            # Calculate rouge scores
            rouge_scores = calculate_rouge_scores_precision(input_text, input_text, rouge_scorer)
            
            # Append results using our new function
            append_result_to_list(
                results, 
                index, 
                input_text, 
                sentence=input_text, 
                pred_class=pred_class, 
                probs=probs, 
                rouge_scores=rouge_scores
            )

        # Case 2: Text with parents or long text - rephrasing, splitting and classification
        elif row['parents'] or word_count >= 20:
            
            # 1. Get context from parent claims if available
            n_parents = 1  # Number of parent claims to include
            input_context = ' '.join(row['parents'][-n_parents:]) if row['parents'] else ' '
            
            # 2. Rephrase text using context
            rephrased_text, rephrasing_prompt = prompt_chatgpt(input_text, input_context, rephrasing_with_context_prompt, CHATGPT_MODEL, TEMPERATURE, TOP_P)
            
            # 3. Split text into sub-sentences
            split_text, splitting_prompt = prompt_chatgpt(rephrased_text, input_context, splitting_with_context_prompt, CHATGPT_MODEL, TEMPERATURE, TOP_P)
            
            # Validate output format
            if not split_text:
                raise ValueError("Output is empty.")
    
            try:
                split_text = ast.literal_eval(split_text)
            except (SyntaxError, ValueError) as e:
                raise ValueError(f"Output not in list format: {e}")
            
            # 4. Process each sub-sentence
            for sent in split_text:
                # Classify sub-sentence
                pred_class, probs = classify_text(model, sent, device)
                
                # Calculate rouge scores
                rouge_scores = calculate_rouge_scores_precision(sent, input_text, rouge_scorer)
                
                # Append results using our new function
                append_result_to_list(
                    results, 
                    index, 
                    input_text, 
                    context=input_context,
                    rephrasing_prompt=rephrasing_prompt,
                    rephrased_text=rephrased_text,
                    splitting_prompt=splitting_prompt,
                    sentence=sent,
                    pred_class=pred_class, 
                    probs=probs, 
                    rouge_scores=rouge_scores
                )

        # Case 3: Short sentences without parents, that is short root sentences - classification only
        else:
            # Perform Classification
            pred_class, probs = classify_text(model, input_text, device)
            
            # Calculate rouge scores
            rouge_scores = calculate_rouge_scores_precision(input_text, input_text, rouge_scorer)
            
            # Append results using our new function
            append_result_to_list(
                results, 
                index, 
                input_text, 
                sentence=input_text, 
                pred_class=pred_class, 
                probs=probs, 
                rouge_scores=rouge_scores
            )

    # Handle errors
    except Exception as e:
        print(f"Error processing claim {row['index']}: {str(e)}")
        append_result_to_list(
            results,
            index,
            input_text,
            error=str(e)
        )

# Create DataFrame from results and save to Excel
df_results = pd.DataFrame(results)
df_results.to_excel(f"/home/fantoni/patent-sentence-classification/results/claim_simplification/{filename}_{CHATGPT_MODEL}.xlsx", index=False)   
print(f"Results saved to /home/fantoni/patent-sentence-classification/results/claim_simplification/{filename}_{CHATGPT_MODEL}.xlsx")

## 6.3 Visualization

In this section, we generate an HTML document to enhance the visualization of both the hierarchical structure and the classification results. This interactive output helps to better interpret and validate the results of the simplification pipeline.

In [None]:
# Sample Excel file path
excel_path = "/home/fantoni/patent-sentence-classification/results/claim_simplification/WO-2020058819-A1_B6078-1706_gpt-4o.xlsx"
#excel_path = "/home/fantoni/patent-sentence-classification/results/claim_simplification/WO-2019021161-A1_F16D65-12_gpt-4o.xlsx"
#excel_path = "/home/fantoni/patent-sentence-classification/results/claim_simplification/WO-2019243958-A1_F16D55-288_gpt-4o.xlsx"
#excel_path = "/home/fantoni/patent-sentence-classification/results/claim_simplification/IT-201900008253-A1_B65G1-023_gpt-4o.xlsx"
#excel_path = "/home/fantoni/patent-sentence-classification/results/claim_simplification/US8695121B2_A42B3_gpt-4o.xlsx"

# Set file name
filename = os.path.splitext(os.path.basename(excel_path))[0]

# Get original text
txt_path = f"/home/fantoni/patent-sentence-classification/data/claim_simplification/{"_".join(filename.split('_')[:-1])}.txt"
with open(txt_path, "r") as file:
    original_claim = file.read()

# Read the Excel file
df = pd.read_excel(excel_path, dtype={"index": str})

# Create level of indentation
df['level'] = df['index'].apply(lambda x: len(str(x).split('.')))

# Define background colors for classes
class_colors = {
    'FUN': '#ffcccc',   # reddish
    'STR': '#cce5ff',   # blueish
    'MIX': '#ccffcc',   # greenish
    'OTH': '#e0e0e0'    # greyish
}

# Create color legend HTML
legend_html = """
<div class="section">
    <summary>Legend</summary>
    <ul style="list-style-type: none; padding-left: 0;">
        <li style="background-color:#ffcccc; padding:4px; margin:2px 0; display: inline-block; width: 100px;">FUN</li>
        <li style="background-color:#cce5ff; padding:4px; margin:2px 0; display: inline-block; width: 100px;">STR</li>
        <li style="background-color:#ccffcc; padding:4px; margin:2px 0; display: inline-block; width: 100px;">MIX</li>
        <li style="background-color:#e0e0e0; padding:4px; margin:2px 0; display: inline-block; width: 100px;">OTH</li>
    </ul>
</div>
"""

# Create html for original text
original_claim_html = "<ul>"
original_claim_html += f"<li>{original_claim}</li>"
original_claim_html += "</ul>"

# Create html for simplified  text
grouped = df.dropna(subset=["sentence"]).groupby("level") # Group by 'level' for collapsible sentence sections

sentence_html = ""
for level, group in grouped:
    indent = int(level) * 40
    sentence_html += f"<details style='margin-left:{indent}px'>"
    sentence_html += "<summary></summary><ul style='list-style-type:none;'>"

    # Track the last seen prefix at this level
    last_index = None
    for _, row in group.iterrows():
        
        current_index = row['index']

        # Add space if prefix changed
        if last_index is not None and current_index != last_index:
            sentence_html += "<hr style='border: none; border-top: 1px solid #888; margin: 10px 0;'>"
        last_index = current_index

        bg_color = class_colors.get(row['pred_class'], '#ffffff')
        sentence_html += (
            f"<li style='background-color:{bg_color}; padding:4px; margin:2px 0;'>"
            f"{row['sentence']}</li>"
        )

    sentence_html += "</ul></details>"

# Combine everything into final HTML
html_content = f"""
<!DOCTYPE html>
<html>
<head>
    <title>Claim Visualization {filename}</title>
    <style>
        .section {{
            margin-bottom: 20px;
        }}
        summary {{
            font-weight: bold;
            cursor: pointer;
        }}
        body {{
            font-family: Arial, sans-serif;
            line-height: 1.6;
            margin: 20px;
        }}
    </style>
</head>
<body>
    <div class="section">
        <details open>
            <summary>Original Text</summary>
            {original_claim_html}
        </details>
    </div>
    {legend_html}
    <div class="section">
        <details open>
            <summary>Simplified Text</summary>
            {sentence_html}
        </details>
    </div>
</body>
</html>
"""

# Save the HTML to a file
html_file_path = f"/home/fantoni/patent-sentence-classification/results/claim_simplification/{filename}.html"
with open(html_file_path, "w", encoding="utf-8") as f:
    f.write(html_content)

print(f"HTML saved to {html_file_path}")