In [2]:
import os
import sys

# --- Configuration ---
TARGET_DIRECTORY = '.'  # Use '.' for the current directory where the script is run
IFRAME_HEIGHT = '600px' # Default height for the iframe
IFRAME_WIDTH = '100%'   # Default width for the iframe
IFRAME_STYLE = 'border: 1px solid #ccc;' # Default style
# --- End Configuration ---

# The exact Giscus script block to search for (must match exactly, including whitespace/newlines)
GISCUS_SCRIPT_START = '<script src="https://giscus.app/client.js"'
GISCUS_SCRIPT_END = '</script>'

def find_giscus_script_block(content):
    """Finds the start and end index of the full Giscus script block."""
    start_index = content.find(GISCUS_SCRIPT_START)
    if start_index == -1:
        return -1, -1 # Start tag not found

    # Find the closing </script> tag *after* the start tag
    end_index = content.find(GISCUS_SCRIPT_END, start_index)
    if end_index == -1:
        return start_index, -1 # End tag not found after start tag

    # Return the start index of the opening tag and the index *after* the closing tag
    return start_index, end_index + len(GISCUS_SCRIPT_END)

def process_directory(directory):
    """Processes markdown files in the specified directory."""
    print(f"Scanning directory: {os.path.abspath(directory)}")
    found_md_files = 0
    processed_files = 0
    skipped_no_html = 0
    skipped_no_giscus = 0
    skipped_already_present = 0

    for filename in os.listdir(directory):
        if filename.endswith(".md"):
            found_md_files += 1
            md_filepath = os.path.join(directory, filename)
            base_name, _ = os.path.splitext(filename)
            html_filename = base_name + ".html"
            html_filepath = os.path.join(directory, html_filename)

            print(f"\nProcessing: {filename}")

            # 1. Check if corresponding HTML file exists
            if not os.path.isfile(html_filepath):
                print(f"  [SKIP] Corresponding HTML file '{html_filename}' not found.")
                skipped_no_html += 1
                continue

            # 2. Read Markdown file content
            try:
                with open(md_filepath, 'r', encoding='utf-8') as f:
                    content = f.read()
            except Exception as e:
                print(f"  [ERROR] Could not read file {filename}: {e}")
                continue

            # 3. Find the Giscus script block
            giscus_start_index, giscus_end_index = find_giscus_script_block(content)

            if giscus_start_index == -1 or giscus_end_index == -1:
                print(f"  [SKIP] Giscus script block not found or incomplete in {filename}.")
                skipped_no_giscus += 1
                continue

            # 4. Construct the iframe tag
            iframe_title = base_name.replace('_', ' ') # Simple title generation
            iframe_tag = (
                f'<iframe src="{html_filename}" '
                f'width="{IFRAME_WIDTH}" height="{IFRAME_HEIGHT}" '
                f'style="{IFRAME_STYLE}" '
                f'title="{iframe_title} Interactive Content">\n'
                f'    您的浏览器不支持 iframe，无法加载交互式内容。\n'
                f'    请 <a href="{html_filename}" target="_blank">点击这里在新窗口中查看</a>。\n'
                f'</iframe>\n\n' # Add extra newline for spacing before Giscus
            )

            # 5. Check if iframe already exists immediately before Giscus
            # Look for the specific src attribute in the section just before Giscus
            potential_iframe_location = content[:giscus_start_index]
            # Check if the specific iframe source is already in the last ~200 chars before Giscus
            # (adjust 200 if needed, prevents searching the whole file unnecessarily)
            search_window = potential_iframe_location[-200:]
            if f'src="{html_filename}"' in search_window and '<iframe' in search_window:
                 print(f"  [SKIP] Iframe for '{html_filename}' seems to be already present before Giscus script.")
                 skipped_already_present += 1
                 continue


            # 6. Insert the iframe tag
            # Ensure there's a newline before the iframe if the preceding content doesn't end with one
            prefix = content[:giscus_start_index]
            if prefix and not prefix.endswith('\n'):
                 prefix += '\n' # Add a newline if needed

            new_content = prefix + iframe_tag + content[giscus_start_index:]

            # 7. Write the modified content back
            try:
                with open(md_filepath, 'w', encoding='utf-8') as f:
                    f.write(new_content)
                print(f"  [SUCCESS] Inserted iframe for '{html_filename}' into {filename}.")
                processed_files += 1
            except Exception as e:
                print(f"  [ERROR] Could not write updated file {filename}: {e}")

    print("\n--- Processing Summary ---")
    print(f"Found Markdown files: {found_md_files}")
    print(f"Successfully processed: {processed_files}")
    print(f"Skipped (No corresponding HTML): {skipped_no_html}")
    print(f"Skipped (Giscus script not found): {skipped_no_giscus}")
    print(f"Skipped (Iframe already present): {skipped_already_present}")
    print("--------------------------")

# --- Run the script ---
if __name__ == "__main__":
    process_directory(TARGET_DIRECTORY)


Scanning directory: c:\Users\Inuyasha\Desktop\AIDIY\docs\RLHF_Pages

Processing: DPO.md
  [SUCCESS] Inserted iframe for 'DPO.html' into DPO.md.

Processing: DPO_Exercise.md
  [SUCCESS] Inserted iframe for 'DPO_Exercise.html' into DPO_Exercise.md.

Processing: DPO_Math.md
  [SUCCESS] Inserted iframe for 'DPO_Math.html' into DPO_Math.md.

Processing: DPO_Problem.md
  [SUCCESS] Inserted iframe for 'DPO_Problem.html' into DPO_Problem.md.

Processing: FPO.md
  [SUCCESS] Inserted iframe for 'FPO.html' into FPO.md.

Processing: GRPO.md
  [SUCCESS] Inserted iframe for 'GRPO.html' into GRPO.md.

Processing: KTO.md
  [SUCCESS] Inserted iframe for 'KTO.html' into KTO.md.

Processing: MCTS_DPO.md
  [SUCCESS] Inserted iframe for 'MCTS_DPO.html' into MCTS_DPO.md.

Processing: NPO.md
  [SUCCESS] Inserted iframe for 'NPO.html' into NPO.md.

Processing: PPO.md
  [SUCCESS] Inserted iframe for 'PPO.html' into PPO.md.

Processing: PPO_Loss.md
  [SUCCESS] Inserted iframe for 'PPO_Loss.html' into PPO_Loss.m