这个脚本能让你把【boluo的原始log】批量变成【高可读性的markdown、html和pdf文件】

最后的产物
- 丑得很朴素，但是能看
- 扉页包括log标题、PL和GM名单
- 保留了boluo的markdown格式渲染
- 自动读取角色名（只有“描述动作”记录的角色无法读取）并给出染色建议
  - 需要手动查看并修改`PCs`里的角色名和颜色
- 可以自定义GM与PL的消息颜色
- 可以产出【单列】或【PC名与RP分开的双列】log
- 可以兼容boluo的“描述动作”消息
- 可以增加boluo频道名为章节名的超链接目录

你需要：从boluo以txt形式导出频道数据，记得
- 过滤游戏外消息
- 只导出基本的名字和内容
把它们放在一个文件夹里，按照你想要的呈现在最终log里的顺序排列就行了。

你还需要：安装一些python的库，并修改下面的参数

In [70]:
# Name the log name (only shown in the html header)
# log_name = "残响"
# log_name = "柯比特先生"
log_name = "嗜极体"


# 以下是一些例子
if log_name == "残响":
       # Map PC names to colors
       # GM建议使用黑色
       # 不知道选什么颜色可以去这里挑：https://htmlcolorcodes.com/color-names/
       GM = {"鄫": {'color': 'black', 'PL': '椅子'}}
       PCs = {"乔尔·塔图姆": {'color': "Teal", 'PL': 'Advil'}, 
              "利兰·温菲尔德/Feldzug": {'color': "Indigo", 'PL': 'Schlacht'}}
       # Set the current working directory to a specific path
       # Usually it's where you store the raw boluo output
       root = ""
       txt_folder = "raw"
       
if log_name == "柯比特先生":
       GM = {"跋伽": {'color': 'black', 'PL': '椅子'}}
       PCs = {"欧内斯特·米勒尔·沃尔": {'color': "Maroon", 'PL': '41'}, 
              "塞润妮缇·热那亚": {'color': "Indigo", 'PL': 'Schlacht'}}
       root = ""
       txt_folder = "raw"
       
if log_name == "嗜极体":
       GM = {"HR": {'color': 'black', 'PL': '巫洁鸾'}}
       PCs = {"艾玻·艾弗雷德/Ishmael": {'color': "Maroon", 'PL': '-'}, 
              "黑斯廷斯/Seduce": {'color': "Indigo", 'PL': '-'},
              "哈维·范德米尔/Rust": {'color': "PaleVioletRed", 'PL': '-'},
              "菲尼斯·费雪/Island": {'color': "Teal", 'PL': '-'},
              "莱奥·奥斯特洛夫斯基/医生": {'color': "OliveDrab", 'PL': '-'},
              "伯恩莱特·米斯贡斯特/Mannstein": {'color': "Goldenrod", 'PL': '-'}}
       root = ""
       txt_folder = "raw"
       
       
# Handles 
auto_color = False # 使用随机生成的颜色进行染色
add_toc = True
double_col = True # in html and pdf only 
remove_md_html = False # only keep pdf--强烈建议选择 反正别的都不好看

# Name output log file paths (stored under root)
md_path = 'log.md'
html_path = 'log.html' 
pdf_path = 'log.pdf'

最后，你需要安装以下的python libraries就可以跑了

In [59]:
import os
import markdown
from bs4 import BeautifulSoup
import html
import nest_asyncio
import asyncio
import re
from pyppeteer import launch
from difflib import SequenceMatcher
import random
from collections import defaultdict
import string
from zhon.hanzi import punctuation
from pprint import pprint

In [60]:
os.chdir(root)

################################# MARKDOWN #################################

# Get the list of all .txt files in the current directory and sort them
txt_paths = sorted([txt_folder + "/" + f for f in os.listdir(txt_folder) if f.endswith('.txt')])
# Make sure the txt files are named and arranged in the order you want

if len(txt_paths) <= 2:
    add_toc = False
    print("Inadequate txt files, TOC closed automatically")

PCnames = []

# Open the output .md file for writing
with open(md_path, 'w', encoding='utf-8') as md_file:
    
    # Loop through each .txt file and read its contents
    for txt_path in txt_paths:
        
        # Extract the chapter title from the filename
        start_index = txt_path.rfind('_') + 1  # Position after the last "_"
        end_index = txt_path.rfind('.txt')  # Position before ".txt"
        
        # Get the chapter title from the filename
        chapter_title = txt_path[start_index:end_index]
        
        # Write the chapter title as a markdown header
        md_file.write(f"## {chapter_title}\n")
        
        with open(txt_path, 'r') as txt_file:
            lines = txt_file.readlines()

            for i, line in enumerate(lines):
                line = line.strip()

                if line.startswith("* "):
                    line = line[2:]
                    
                if line.startswith("<"):
                    match = re.search(r"^<(.*?)>", line)
                    if match:
                        PCnames.append(match.group(1))

                md_file.write(f"{line}\n")

                # Add extra newline based on next line conditions
                if i < len(lines) - 1 and (lines[i + 1].startswith("<") or lines[i + 1].startswith("* ")):
                    md_file.write("\n")
        
if add_toc:  # If Table of Contents (TOC) is to be added
    
    # Function to generate a Table of Contents
    def generate_toc(markdown_text):
        toc_lines = []
        
        # Find all headers in the markdown text (lines starting with #)
        headers = re.findall(r'^(#+)\s+(.*)', markdown_text, re.MULTILINE)
        
        for header in headers:
            # Determine header level by counting the number of '#' characters
            level = len(header[0]) 
            # The header text itself
            text = header[1] 
            
            # Create an anchor for the header 
            # by converting the text to lowercase 
            # and replacing spaces with dashes
            anchor = text.lower().replace(' ', '-')
            
            # Format the TOC line and append to the list
            toc_lines.append(f"{'  ' * (level - 1)}- [{text}](#{anchor})")
        
        # Combine the TOC lines into a single string
        toc = "\n".join(toc_lines)
        return toc
    
    # Open the .md file to read its content
    with open(md_path, 'r', encoding='utf-8-sig') as md_file:
        md_content = md_file.read()
    
    # Generate the Table of Contents
    toc = generate_toc(md_content)
    
    # Insert the Table of Contents at the beginning of the markdown content
    md_with_toc = f"# 目录\n\n{toc}\n\n{md_content}"
    
    # Write the markdown content with the TOC at the beginning
    with open(md_path, "w") as file:
        file.write(md_with_toc)

# Print success message
# print(f"Content written to {md_path} successfully!")

PCnames = list(set(PCnames))

In [None]:
################################# COLORIZE #################################

# Function to normalize names (remove punctuation and whitespace)
def normalize_name(name):
    return re.sub(r'\s+', '', name.translate(str.maketrans('', '', string.punctuation + punctuation)))

# Function to generate a random HTML color code
def generate_random_color():
    return f"#{random.randint(0, 0xFFFFFF):06x}"

# Normalize names
normalized_names = {name: normalize_name(name) for name in PCnames}

# Initialize clusters
clusters = []
used_colors = []

# Group names into clusters based on substring containment
for name, norm_name in normalized_names.items():
    added = False
    for cluster in clusters:
        if any(norm_name in cluster_norm_name or cluster_norm_name in norm_name for _, cluster_norm_name in cluster) or SequenceMatcher(None, norm_name, cluster[0][1]).ratio() > 0.7:
            cluster.append((name, norm_name))
            added = True
            break
    if not added:
        clusters.append([(name, norm_name)])

# Assign unique colors to each cluster
result = {}
for cluster in clusters:
    color = generate_random_color()
    while color in used_colors:
        color = generate_random_color()
    used_colors.append(color)
    for original_name, _ in cluster:
        result[original_name] = {'color': color, 'PL': '-'}
        
# Group names by their assigned color
color_groups = defaultdict(list)
for name, attributes in result.items():
    color_groups[attributes['color']].append(name)

recPCs = {}
# Organzie grouped names by color
for color, names in color_groups.items():
    for name in names:
        recPCs.update({name: result[name]})

if auto_color:
    PCs = recPCs
    print("Please change PL names and colors in `PCs` manually")

print("Recommended coloring:")     
pprint(recPCs)

Recommended coloring:
{'ATF': {'PL': '-', 'color': '#c5022d'},
 'Adam2': {'PL': '-', 'color': '#4dacb6'},
 'Adam3': {'PL': '-', 'color': '#4dacb6'},
 'Adam7': {'PL': '-', 'color': '#4dacb6'},
 'Adam8': {'PL': '-', 'color': '#4dacb6'},
 'Eve9': {'PL': '-', 'color': '#52c6d9'},
 'HL': {'PL': '-', 'color': '#eacbce'},
 'HR': {'PL': '-', 'color': '#e7cf7b'},
 'HR 巫洁鸾': {'PL': '-', 'color': '#e7cf7b'},
 'MOTH': {'PL': '-', 'color': '#6d7fff'},
 'Seduce': {'PL': '-', 'color': '#6ae988'},
 '【Friend】': {'PL': '-', 'color': '#587a1d'},
 '【Ishmael】': {'PL': '-', 'color': '#e8e44c'},
 '【Island】': {'PL': '-', 'color': '#fb401d'},
 '【Lion】': {'PL': '-', 'color': '#851484'},
 '【Mannstein】': {'PL': '-', 'color': '#dfed5c'},
 '【Rust】': {'PL': '-', 'color': '#07adc3'},
 '【Seduce】': {'PL': '-', 'color': '#6ae988'},
 '乔治·盖茨': {'PL': '-', 'color': '#e9f8ab'},
 '二维码内容': {'PL': '-', 'color': '#ea72db'},
 '伊丽莎白': {'PL': '-', 'color': '#681e0e'},
 '休·肖恩': {'PL': '-', 'color': '#1079eb'},
 '伯恩莱特·米斯贡斯特': {'PL':

Please change PL names in `PCs` manually


In [65]:
################################# HTML #################################

# Function to apply color mapping based on text
def apply_color(text):
    # Check if any keyword from PCs exists in the text
    # and apply corresponding color
    for name, dic in PCs.items():
        if name == text:
            color = dic['color']
            return color, f'<span style="color: {color};">{text}</span>'
    return "black", text  # Return text unchanged if no keyword matches

if double_col:
    print("Double-Column selected")
    
# Open the input .md file and read its contents
with open(md_path, 'r', encoding='utf-8-sig') as md_file:
    md_content = md_file.read()

md_content = md_content.replace("<", "&lt;")
md_content = md_content.replace(">", "&gt;")

# Convert the Markdown content to HTML
html_content = markdown.markdown(md_content)

# Split the content by line and check if the line starts with the target string
html_lines = html_content.split('\n')

# Join the lines back together into a single HTML string
html_content = '<br>'.join(html_lines)
html_content = html_content.replace("><br><p>", "><p>")

# Create new HTML structure
new_html = """
<!DOCTYPE HTML>
<html lang="zh"> <!-- Specifies the document type and language (Chinese) -->
<head>
    <meta charset="UTF-8"> <!-- Sets the character encoding to UTF-8 -->
    <meta name="viewport" content="width=device-width, initial-scale=1.0"> <!-- Sets viewport settings for responsive design -->
    <title>
        <!-- The title of the document will dynamically be inserted here with the value of the 'log_name' variable -->
        """ + log_name + """
    </title>
    
    <style>
        /* Styles for page-break behavior when displaying/printing */
        @media all {
            .page-break {
                display: none; /* Hides the page-break div when viewed on screen */
            }
        }

        @media print {
            .page-break {
                display: block; /* Shows the page-break div when printing */
                page-break-before: always; /* Forces page break before the page-break div */
                height: 0; /* Ensures no extra space is added for the page break */
            }
        }

        /* General body styling */
        body {
            font-family: STSong; /* Sets the font to STSong (Chinese font family) */
            line-height: 200%; /* Increases line spacing for readability */
        }

        /* Flexbox container to create a responsive layout with left and right margins */
        .container {
            display: flex; /* Enables flexbox layout for children elements */
            margin-left: 20px;  /* Adds left margin */
            margin-right: 20px; /* Adds right margin */
        }

        /* Styling for columns within the flex container */
        .column {
            padding: 5px; /* Adds padding inside each column */
            position: relative; /* Positions the columns relative to each other */
        }

        /* Styling for a single column element */
        .single-column {
            margin-top: 10px; /* Adds space above the column */
            margin-bottom: 10px; /* Adds space below the column */
            margin-right: 15px; /* Adds margin to the right of the column */
            margin-left: 15px;  /* Adds margin to the left of the column */
        }

        /* Adds a vertical line between columns, except for the last one */
        .column:not(:last-child) {
            border-right: 1px solid #000; /* Adds a black vertical line */
            margin-right: 10px; /* Adds margin to the right of the column */
            margin-left: 10px;  /* Adds margin to the left of the column */
        }

        /* Styles for h2 elements, typically headings */
        h2 {
            padding: 0; /* Removes the default padding around h2 */
        }

        /* Styling for short breaks to adjust spacing */
        .short-break {
            margin: 4px 0; /* Adds margin around the short break */
            padding: 0;    /* Removes padding */
        }

        /* Table of contents container styles */
        toc-container {
            margin-top: 50px; /* Adds space above the TOC */
            margin: 0 auto; /* Centers the TOC container horizontally */
        }
        .toc-container h2 {
            text-align: center; /* Centers the title of the TOC */
            font-size: 2em; /* Increases font size of the title */
            margin-top: 50px; /* Adds margin above the title */
            margin-bottom: 50px; /* Adds margin below the title */
        }
        .toc-container ul {
            margin-left: 20px; /* Adds left margin for the list */
            list-style-type: none; /* Removes bullet points from the list */
            padding: 0; /* Removes padding */
        }
        .toc-container li {
            margin: 10px 0; /* Adds margin between each item in the TOC */
            display: flex; 
            justify-content: space-between; /* Aligns content within each item */
            padding-bottom: 5px; /* Adds space below each item */
            margin-left: 10px; /* Adds left margin */
        }
        .toc-container .chapter-title {
            font-size: 1.1em; /* Increases font size slightly */
            text-decoration: none; /* Removes underline from links */
            color: black; /* Sets the text color to black */
        }

        /* Info container styling */
        .info-container {
            text-align: right; /* Aligns the text to the right */
            padding: 15px; /* Adds padding inside the container */
            max-width: 600px; /* Limits the width of the container */
            margin: 0 auto; /* Centers the container horizontally */
            margin-top: 200px; /* Adds space above the container */
            margin-bottom: 300px; /* Adds space below the container */
        }
        .info-container h1 {
            text-align: center; /* Centers the header text */
            font-size: 3em; /* Increases font size */
            margin-top: 50px; /* Adds margin above the header */
            margin-bottom: 50px; /* Adds margin below the header */
        }

        /* Info item styling */
        .info-item {
            display: flex; /* Enables flexbox layout for items */
            gap: 20px; /* Adds spacing between child elements */
            justify-content: space-between; /* Aligns content to be spaced apart */
        }

        /* Label styling */
        .label {
            padding: 10px; /* Adds padding inside the label */
            margin-right: 10px; /* Adds space to the right of the label */
        }

        /* Value styling */
        .value {
            margin-left: 10px; /* Adds space to the left of the value */
            padding: 10px; /* Adds padding inside the value */
        }

    </style>
</head>

<body>
<!-- The content of the body section will go here -->
"""

# Parse HTML using BeautifulSoup
soup = BeautifulSoup(html_content, 'lxml')

# Initialize section number for header sections
section_number = 1
    
# Loop through all <p> and <h2> tags to process content
for tag in soup.find_all(['p', 'h2']):

    # Skip the Table of Contents (TOC) title
    if tag.get_text() == "目录":
        continue
    
    # Format and number sections for <h2> headers
    if tag.name == "h2":

        new_html += f"""
        {'<div class="page-break"></div><br><br>' if section_number != 1 else ''}
        <h2 style='text-align: center'>{tag.get_text()}</h2>
        <br><br>
        """
        
        section_number += 1

    # Process and color main log lines
    else:
        # Decode HTML entities to easily handle < and > symbols
        p_text = tag.decode_contents()

        # Split into two columns and apply color formatting
        if double_col:
            
            action = False
            
            for name, dic in PCs.items():
                
                if p_text.startswith(name + ' '):
                    color = dic['color']
                    p_text = p_text.replace(name + ' ', name, 1)
                    content = f'<span style="color: {color};">{p_text}</span>' + '<br>'
                    new_html += f"""
                    <div class="container" >
                    <div class="single-column" style="width: 100%">
                    {content}
                    </div>
                    </div>
                    """
                    action = True
            
            
            if not action:
                
                # Initialize left and right columns for text separation
                left_column = ''
                right_column = ''


                # Split content based on < and > tags
                while '&lt;' in p_text and '&gt;' in p_text:
                    start_idx = p_text.find('&lt;')
                    end_idx = p_text.find('&gt;')
                    color, left_content = apply_color(p_text[start_idx+4:end_idx])  # Apply color
                    left_column += left_content + '<br>'  # Add content to left column
                    p_text = p_text[end_idx+4:]  # Remove processed part from original text

                # Remaining content is added to the right column
                right_column += f'<span style="color: {color};">{p_text}</span>'

                # Add HTML for two-column layout
                new_html += f"""
                <div class="container" >
                    <div class="column" style="width: 25%;">{left_column}</div>
                    <div class="column" style="width: 75%;">{right_column}</div>
                </div>
                """

        # Color the text for a single column layout
        else:
                
            for name, dic in PCs.items():
                color = dic['color']
    
                # Check if the line starts with a specific name or action
                if p_text.startswith(f"&lt;{name}&gt;") or p_text.startswith(f"{name} "):
                    if not p_text.startswith(f"&lt;{name}&gt;"):
                        p_text = p_text.replace(f"{name} ", name, 1)
                    new_html += f'<p><span style="color:{color};">{p_text}</span></p>'
                    break
            else:
                new_html += f"<p><span>{p_text}</span></p>"
            
                

# Wrap up the HTML structure
new_html += '</body></html>'

soup = BeautifulSoup(new_html, 'lxml')

# Add table of contents if required
if add_toc == True:

    # Create the TOC container
    toc_container = soup.new_tag("div", **{"class": "toc-container"})
    toc_heading = soup.new_tag("h2")
    toc_heading.string = "目录"  
    # toc_heading['style'] = "text-align: center;"
    toc_container.append(toc_heading)
    
    # Create the unordered list for the TOC
    toc_list = soup.new_tag("ul")
    toc_list['style'] = "list-style-type: none; text-align: center;"
    
    # Loop through all <h2> tags to create TOC links
    for i, h2_tag in enumerate(soup.find_all('h2'), start=1):
        # Add an ID to each <h2> tag for linking
        chapter_id = f"chapter-{i}"
        h2_tag['id'] = chapter_id
    
        toc_item = soup.new_tag("li")
        
        # Create the link to the chapter
        toc_link = soup.new_tag("a", href=f"#{chapter_id}", **{"class": "chapter-title"})
        toc_link.string = h2_tag.get_text()
        toc_item.append(toc_link)

        toc_list.append(toc_item)
    
    # Add the TOC list to the container
    toc_container.append(toc_list)
    
    # Insert the TOC at the top of the body section
    soup.body.insert(0, toc_container)

    # Insert page break
    page_break_div = soup.new_tag("div", **{"class": "page-break"})
    toc_container.insert_after(page_break_div)


# Add scenario information (log title)
info_container = soup.new_tag("div", **{"class": "info-container"})
info_heading = soup.new_tag("h1")
info_heading.string = log_name  
info_container.append(info_heading)

def add_player_info(name, dic, label_suffix=""):
    PL = dic['PL']
    color = dic['color']
    player_item = soup.new_tag("div", **{"class": "info-item"})
    
    # Create player label
    player_label = soup.new_tag("span")
    player_label.string = f"{name}{label_suffix}"
    player_label['style'] = f"color: {color};"
    
    # Create player value
    player_value = soup.new_tag("span")
    player_value.string = PL
    player_value['style'] = f"color: {color};"
    
    # Append to item and container
    player_item.append(player_label)
    player_item.append(player_value)
    info_container.append(player_item)

# Add GM information
for name, dic in GM.items():
    add_player_info(name, dic, "/Game Master")

# Add PCs information
for name, dic in PCs.items():
    add_player_info(name, dic)

# Append the info container to the body
soup.body.insert(0, info_container)

# Insert a page break
page_break_div = soup.new_tag("div", **{"class": "page-break"})
info_container.insert_after(page_break_div)

# Finalize the modified HTML
new_html = str(soup)

# Replace line breaks with styled short breaks
new_html = new_html.replace("<br>", '<br class="short-break">')

# Write the final HTML content to a new file
with open(html_path, 'w', encoding='utf-8') as f:
    f.write(new_html)

print(f"Markdown file has been converted to {html_path}")

################################# PDF #################################

# Apply nest_asyncio to allow running asyncio within Jupyter or nested event loops
nest_asyncio.apply()

# Function to generate PDF from HTML content
async def generate_pdf_from_html(html_content, pdf_path, html_path, root):
    # Launch the browser
    browser = await launch()
    
    # Open a new page in the browser
    page = await browser.newPage()

    # page.setContent(html_path)
    # Use 'goto' with 'waitUntil' to ensure the HTML is fully loaded 
    # Ensure the correct path is used for loading local HTML file
    await page.goto("file:///" + root + "/" + html_path, 
                    {'waitUntil': 'domcontentloaded'})

    # Generate the PDF from the loaded page
    await page.pdf({'path': pdf_path, 
                    'margin': {'top': 50, 'bottom': 50, 'right': 50, 'left': 50}})
                    # 'displayHeaderFooter': True,
                    #'headerTemplate': """""",
                    # 'footerTemplate': """
                    # <div style="border-top: solid 1px #bbb; width: 100%; font-size: 9px; padding: 5px 5px 0; color: #bbb; position: relative;">
                    # <div style="position: absolute; right: 5px; top: 5px;"><span class="pageNumber"></span>/<span class="totalPages"></span></div></div>
                    # """}

    
    # Close the browser once the PDF is generated
    await browser.close()

# Read the HTML content from the file (assuming the HTML file is already created)
with open(html_path, 'r', encoding='utf-8-sig') as html_file:
    html_content = html_file.read()

# PDF generation (ensure the event loop runs asynchronously)
asyncio.get_event_loop().run_until_complete(generate_pdf_from_html(html_content, pdf_path, html_path, root))

# Print confirmation message once the process is done
print(f"HTML file has been converted to {pdf_path}")

if add_toc:
    print("Table of Contents selected")

if remove_md_html:
    os.remove(md_path)
    os.remove(html_path)
    print(f"{md_path} and {html_path} have been deleted")

Double-Column selected
Markdown file has been converted to log.html
HTML file has been converted to log.pdf
Table of Contents selected


# To-do
- [ ] boluo自动爬虫下载
- [ ] 夜间模式
- [ ] 人物卡自动录入
- [ ] 输入JSON以实现更好的角色名读取（从描述动作里也可以读）
- [ ] 兼容图片

以上大概都不会有，能用就行