目前的状况
长度限制的问题尝试用canvas规避，或者IDE原子化处理
tailwind-css确实极大程度可以考虑出具好看的内容，但最好在一个跑通的demo网站上尝试直接验证Vite或者React的前端逻辑

算上依赖组件，总字符量是从6000变成了4600+2200，实际上也没怎么减少——应该有一个很轻量且最大化的处理，但这种人工就很不值得了

把仅1个的也匹配过去的代价似乎还挺大，总容量增加不少

In [5]:
# 第一个单元格：导入必要的库
import re
import json
import os
import pandas as pd
from bs4 import BeautifulSoup
from collections import Counter
from IPython.display import display, HTML
import ipywidgets as widgets

# 预定义常见颜色，确保包含primary
common_colors = [
    'primary', 'secondary', 'accent',  # 主题颜色优先
    'gray', 'red', 'blue', 'green', 'yellow', 'indigo', 'purple',
    'pink', 'orange', 'teal', 'cyan', 'amber', 'lime', 'emerald',
    'rose', 'fuchsia', 'sky', 'violet', 'black', 'white'
]

# re_color_pattern = re.compile(r'((?:dark:|hover:|focus:|active:)*)?(text|bg|border|ring|from|to|via)-(' + '|'.join(common_colors) + r')-(\d+)(?:/(\d+))?')
re_color_pattern = re.compile(r'((?:dark:|hover:|focus:|active:)*)?(text|bg|border|ring|from|to|via)-(' + '|'.join(common_colors) + r')(?:-(\d+))?(?:/(\d+))?')


# 修正版：加载JS映射函数
def load_js_mappings(js_file):
    """加载现有JS映射 - 增强版，带调试输出"""
    class_mappings = {}  # 类 -> 名称
    template_mappings = {}  # 模板 -> 名称

    if os.path.exists(js_file):
        with open(js_file, 'r', encoding='utf-8') as f:
            content = f.read()

            # === 调试: 显示JS文件内容片段 ===
            # print(f"\n检查JS文件: {js_file}")
            content_preview = content[:500] + ("..." if len(content) > 500 else "")
            # print(f"文件内容预览:\n{content_preview}")

            # 检查是否包含twTemplates
            # if 'twTemplates' not in content:
            #     print("⚠️ 警告: JS文件中没有找到'twTemplates'定义!")

            # 提取类映射
            class_pattern = r'twClasses\s*=\s*{([^}]*)}'
            class_match = re.search(class_pattern, content, re.DOTALL)
            if class_match:
                for m in re.finditer(r"'([^']+)':\s*'([^']+)'", class_match.group(1)):
                    name, classes = m.groups()
                    class_mappings[classes] = name

            # 提取模板映射
            # template_pattern = r'twTemplates\s*=\s*{([^}]*)}'# 正确的正则表达式：
            template_pattern = r'twTemplates\s*=\s*{([\s\S]*?)};'
            template_match = re.search(template_pattern, content, re.DOTALL)
            if template_match:
                # print("找到twTemplates定义")
                template_content = template_match.group(1)
                # print(f"模板内容:\n{template_content}")

                template_pairs = re.finditer(r"'([^']+)':\s*'([^']+)',?", template_content)
                for m in template_pairs:
                    name, template = m.groups()
                    template_mappings[template] = name

                # for m in re.finditer(r"'([^']+)':\s*'([^']+)'", template_match.group(1)):
                #     name, template = m.groups()
                #     template_mappings[template] = name
                    # print(f"加载模板: {name} -> {template[:30]}..." if len(template) > 30 else template)
            else:
                print("⚠️ 未找到模板定义或格式不匹配")
    else:
        print(f"⚠️ JS文件不存在: {js_file}")

    print(f"加载了 {len(class_mappings)} 个类映射和 {len(template_mappings)} 个模板映射")
    return class_mappings, template_mappings


In [None]:

def analyze_html(html_file):
    """分析HTML文件中的类组合"""
    with open(html_file, 'r', encoding='utf-8') as file:
        content = file.read()
        soup = BeautifulSoup(content, 'lxml')

    # 统计class组合
    class_combinations = Counter()
    class_contexts = {}

    for element in soup.find_all(True):
        if 'class' in element.attrs and element['class']:
            class_combo = ' '.join(element['class'])
            if class_combo:
                class_combinations[class_combo] += 1

                # 收集上下文
                if class_combo not in class_contexts:
                    class_contexts[class_combo] = []

                # 获取元素信息
                tag = element.name
                section = element.find_parent(id=True).get('id', '') if element.find_parent(id=True) else ''
                content = element.get_text()[:30].strip()

                # 存储上下文
                context = f"<{tag}>" + (f" #{section}" if section else "") + (f" \"{content}...\"" if content else "")
                if context not in class_contexts[class_combo]:
                    class_contexts[class_combo].append(context)

    # 转换为DataFrame
    data = []
    for combo, count in class_combinations.items():
        data.append({
            '类组合': combo,
            '出现次数': count,
            '长度': len(combo),
            '上下文': ' | '.join(class_contexts[combo][:2])  # 只显示前两个上下文
        })

    df = pd.DataFrame(data)

    # 识别颜色模板和变体
    similar_classes, all_variants = identify_color_variants(df)

    return df, similar_classes, all_variants

def identify_color_variants(df):
    """识别所有颜色变体模板和每个变体所属的模板"""

    # 颜色模式匹配 - 单一模式匹配所有可能的状态前缀
    color_pattern = re_color_pattern
    # 记录已处理的类组合
    processed_classes = set()

    # 用于保存相似类组
    class_groups = {}

    # 所有变体与其所属模板的映射
    all_variants = {}

    # 首先按长度和频率排序
    sorted_df = df.sort_values(by=['长度', '出现次数'], ascending=[False, False])

    for _, row in sorted_df.iterrows():
        class_combo = row['类组合']

        # 跳过已处理的类
        if class_combo in processed_classes:
            continue

        # 检测类中的颜色引用
        template = class_combo
        has_color = False
        color_map = {}

        # 查找所有颜色引用
        for match in color_pattern.finditer(class_combo):
            # 解析匹配组
            prefix, property_type, color, shade, opacity = match.groups()
            prefix = prefix or ""  # 确保prefix不是None
            full_match = match.group(0)

            # 生成基础颜色键 (不包含shade)
            # 使用颜色+属性类型作为键，确保不同属性使用不同参数
            base_key = f"{prefix}{property_type}-{color}"

            # 如果是新的颜色键，添加到映射
            if base_key not in color_map:
                param_index = len(color_map) + 1
                color_map[base_key] = {
                    'color': color,
                    'param': f"color-{param_index}",
                    'matches': []
                }

            # 记录这个匹配
            color_map[base_key]['matches'].append({
                'full': full_match,
                'prefix': prefix,
                'property': property_type,
                'shade': shade,
                'opacity': opacity
            })

            # 创建替换字符串，保留原始的shade值
            # 创建替换字符串
            if shade:
                replacement = f"{prefix}{property_type}-{{color-{color_map[base_key]['param'].split('-')[1]}}}-{shade}"
            else:
                replacement = f"{prefix}{property_type}-{{color-{color_map[base_key]['param'].split('-')[1]}}}"
            # replacement = f"{prefix}{property_type}-{{color-{color_map[base_key]['param'].split('-')[1]}}}-{shade}"
            if opacity:
                replacement += f"/{opacity}"

            # 替换模板中的颜色
            template = template.replace(full_match, replacement)
            has_color = True

        # 只处理包含颜色的类
        if has_color:
            # 初始化模板组
            if template not in class_groups:
                class_groups[template] = {
                    'variants': [],
                    'colors': set(),
                    'color_map': color_map,
                    'total_count': 0
                }

            # 添加这个变体
            class_groups[template]['variants'].append({
                'original': class_combo,
                'count': row['出现次数'],
                'context': row['上下文']
            })

            # 更新总计数
            class_groups[template]['total_count'] += row['出现次数']

            # 收集颜色
            for key, data in color_map.items():
                class_groups[template]['colors'].add(data['color'])

            # 标记为已处理
            processed_classes.add(class_combo)

            # 记录这个变体属于哪个模板
            all_variants[class_combo] = template

    # 筛选模板 - 优先保留频率高的
    filtered_groups = {}
    for template, group in class_groups.items():
        # 如果有多个变体或出现次数高于1次，或者包含multiple颜色，则保留
        if "md:hidden p-2 rounded-md" in template:
            print(f"模板: {template}")
            print(f"变体: {group['variants']}")
            print(f"颜色: {group['colors']}")
            print(f"总计数: {group['total_count']}")
        if (len(group['variants']) > 1 or
            group['total_count'] > 1 or
            len(group['colors']) > 1 or
            any(variant['count'] > 1 for variant in group['variants'])) or True:
            filtered_groups[template] = group
        # else:
        #     template

            # 确保所有变体都标记为已处理
            for variant in group['variants']:
                all_variants[variant['original']] = template

    return filtered_groups, all_variants

# 分析界面
html_file_input = widgets.Text(value='index.html', description='HTML文件:')
min_count = widgets.IntText(value=2, description='最小次数:', layout=widgets.Layout(width='150px'))
min_length = widgets.IntText(value=30, description='最小长度:', layout=widgets.Layout(width='150px'))
analyze_button = widgets.Button(description="分析HTML")
analysis_output = widgets.Output()

display(HTML("<h3>步骤1: 分析HTML文件</h3>"))
display(widgets.HBox([html_file_input, min_count, min_length]))
display(analyze_button)
display(analysis_output)

@analysis_output.capture()
def on_analyze_clicked(b):
    html_file = html_file_input.value
    if not os.path.exists(html_file):
        print(f"错误: 文件 {html_file} 不存在")
        return

    print(f"正在分析 {html_file}...")
    df, similar_classes, all_variants = analyze_html(html_file)

    # 存储结果供其他单元格使用
    global global_df, global_similar_classes, global_all_variants
    global_df = df
    global_similar_classes = similar_classes
    global_all_variants = all_variants

    # 按条件筛选
    min_count_value = min_count.value
    min_length_value = min_length.value

    # 条件是OR
    filtered_df = df[(df['出现次数'] >= min_count_value) | (df['长度'] >= min_length_value)]
    filtered_df = filtered_df.sort_values(by='出现次数', ascending=False)

    print(f"找到 {len(df)} 个类组合，其中 {len(filtered_df)} 个满足筛选条件 (出现次数 >= {min_count_value} 或 长度 >= {min_length_value})")

    # 显示高频/长类组合
    if not filtered_df.empty:
        print("\n高频/长类组合:")
        display(HTML(filtered_df.head(30).to_html(escape=False)))

    # 显示发现的颜色变体模板
    if similar_classes:
        print(f"\n找到 {len(similar_classes)} 个颜色变体模板:")
        for i, (template, data) in enumerate(list(similar_classes.items())):  # 只显示前5个
            colors = ", ".join(sorted(data['colors']))
            variants_info = []
            for variant in data['variants'][:3]:  # 显示前3个变体
                variants_info.append(f"{variant['original']} (出现{variant['count']}次)")

            print(f"\n模板 {i+1}: 总共{len(data['variants'])}个变体，总使用{data['total_count']}次")
            print(f"颜色参数: {colors}")
            print(f"变体示例: {variants_info[0]}")
            if len(variants_info) > 1:
                for v in variants_info[1:]:
                    print(f"          {v}")
            print(f"模板: {template}")

        # 检查特定类是否在模板中
        search_class = "flex-shrink-0 h-10 w-10 theme-avatar bg-primary-100 dark:bg-primary-900 mr-4"
        if search_class in all_variants:
            template = all_variants[search_class]
            template_data = similar_classes[template]
            print(f"\n您查询的类组合已被识别为模板的变体:")
            print(f"所属模板: {template}")
            print(f"模板包含颜色: {', '.join(sorted(template_data['colors']))}")
            print(f"模板变体总数: {len(template_data['variants'])}")
        else:
            print(f"\n您查询的类组合没有被识别为颜色变体")
            # 检查是否有颜色但没被识别
            if "primary" in search_class:
                print("原因可能是:")
                if search_class not in df['类组合'].values:
                    print("- 此类组合不存在于HTML文件中")
                else:
                    row = df[df['类组合'] == search_class].iloc[0]
                    if row['出现次数'] == 1:
                        print("- 此类组合只出现了1次，且没有其他颜色变体")
                    else:
                        print("- 可能是识别算法未能正确匹配颜色模式")

                # 尝试手动检测
                color_patterns = [
                    re.compile(r'(text|bg|border|ring)-(' + 'primary' + r')-(\d+)'),
                    re.compile(r'dark:(text|bg|border|ring)-(' + 'primary' + r')-(\d+)')
                ]
                for pattern in color_patterns:
                    matches = list(pattern.finditer(search_class))
                    if matches:
                        print(f"- 检测到颜色引用: {[m.group(0) for m in matches]}")

analyze_button.on_click(on_analyze_clicked)

In [None]:

df, similar_classes, all_variants = analyze_html('index.html')
# df
similar_classes#['md:hidden']
# df[df['类组合'].str.contains('md:hidden')]
# check_str = df['类组合'].iloc[2]
# check_str = "sticky top-0 z-50 bg-white/80 dark:bg-gray-900/80 backdrop-blur-md shadow-sm theme-border-b"
# check_str = "md:hidden p-2 rounded-md hover:bg-gray-100 dark:hover:bg-gray-800 theme-btn"

# color_pattern = re.compile(r'((?:dark:|hover:|focus:|active:)*)?(text|bg|border|ring|from|to|via)-(' + '|'.join(common_colors) + r')(?:-(\d+))?(?:/(\d+))?')

# matches = list(color_pattern.finditer(check_str))
# matches



In [None]:
# 第三个单元格：生成名称建议（修订版 - 排除颜色变体）
def suggest_name(class_combo, context, existing_names=None, template_mode=False):
    """为类组合生成语义化名称建议"""
    if existing_names is None:
        existing_names = set()

    # 分析类组合
    classes = class_combo.split()

    # 从上下文提取信息
    section_match = re.search(r'#([a-zA-Z0-9_-]+)', context)
    section = section_match.group(1) if section_match else ""

    tag_match = re.search(r'<([a-zA-Z0-9_-]+)>', context)
    tag = tag_match.group(1) if tag_match else ""

    # 确定元素类型
    element_type = ""
    if tag in ['a', 'button']:
        element_type = 'btn'
    elif tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
        element_type = 'heading'
    elif tag == 'p':
        element_type = 'text'
    elif tag == 'div':
        if any('flex' in cls for cls in classes):
            element_type = 'flex-container'
        elif any('grid' in cls for cls in classes):
            element_type = 'grid'
        else:
            element_type = 'container'
    elif tag == 'span':
        element_type = 'label'
    elif tag == 'img':
        element_type = 'image'
    elif tag == 'ul' or tag == 'ol':
        element_type = 'list'
    elif tag == 'li':
        element_type = 'list-item'
    elif tag == 'input':
        element_type = 'input'
    elif tag == 'textarea':
        element_type = 'textarea'

    # 识别样式特征
    style_features = []
    if any('text-' in cls for cls in classes):
        style_features.append('text')
    if any('bg-' in cls for cls in classes):
        style_features.append('bg')
    if any('dark:' in cls for cls in classes):
        style_features.append('themed')
    if any('hover:' in cls for cls in classes):
        style_features.append('interactive')
    if any('rounded' in cls for cls in classes):
        style_features.append('rounded')
    if any('shadow' in cls for cls in classes):
        style_features.append('shadow')
    if any('border' in cls for cls in classes):
        style_features.append('bordered')
    if any('theme-' in cls for cls in classes):
        theme_feature = next((cls for cls in classes if cls.startswith('theme-')), '')
        style_features.append(theme_feature)

    # 组合名称部分
    name_parts = []

    # 添加区域/位置
    if section in ['header', 'footer', 'gallery', 'about', 'banner', 'main']:
        name_parts.append(section)

    # 添加元素类型
    if element_type:
        name_parts.append(element_type)

    # 添加样式特征
    if style_features and (not element_type or element_type in ['container', 'text']):
        for feature in ['theme-avatar', 'interactive', 'themed', 'shadow', 'rounded', 'bordered', 'bg']:
            if feature in style_features:
                name_parts.append(feature.split('-')[-1])
                break

    # 如果没有足够信息，使用默认名称
    if not name_parts:
        base_name = 'ui-component'
    else:
        base_name = '-'.join(name_parts)
    if template_mode:
        base_name = 't-' + base_name

    # 确保名称唯一
    final_name = base_name
    i = 1
    while final_name in existing_names:
        final_name = f"{base_name}-{i}"
        i += 1

    return final_name


# 生成建议界面
js_file_input = widgets.Text(value='tailwind-classes.js', description='JS文件:')
suggest_button = widgets.Button(description="生成名称建议")
suggestions_output = widgets.Output()

display(HTML("<h3>步骤2: 生成名称建议</h3>"))
display(js_file_input)
display(suggest_button)
display(suggestions_output)

@suggestions_output.capture()
def on_suggest_clicked(b):
    # 检查是否已运行分析
    if 'global_df' not in globals():
        print("请先运行HTML分析")
        return

    js_file = js_file_input.value

    # 加载现有映射
    existing_class_mappings, existing_template_mappings = load_js_mappings(js_file)
    print(f"已加载 {len(existing_class_mappings)} 个类映射和 {len(existing_template_mappings)} 个模板映射")

    # 准备生成建议
    df = global_df.copy()

    # 筛选条件: 出现次数 >= 2 或 长度 > 30
    filtered_df = df[(df['出现次数'] >= 2) | (df['长度'] > 30)]
    filtered_df = filtered_df.sort_values(by='出现次数', ascending=False)

    # 已使用的名称
    used_names = set(existing_template_mappings.values())
    # used_names.update(existing_template_mappings.values())

    # 1. 首先为模板生成建议
    template_suggestions = {}
    if 'global_similar_classes' in globals() and global_similar_classes:
        for template, data in global_similar_classes.items():
            # 跳过已映射的模板
            if template in existing_template_mappings:
                continue

            # 使用第一个变体的上下文
            first_variant = data['variants'][0]['original']
            context = data['variants'][0]['context']

            # 生成名称建议
            name = suggest_name(first_variant, context, used_names, template_mode=True)
            template_suggestions[template] = name
            used_names.add(name)

    # 2. 然后为普通类生成建议，但排除所有颜色变体模板的变体
    regular_suggestions = {}

    # 收集所有已识别为颜色变体的类组合
    variant_classes = set()
    if 'global_all_variants' in globals() and global_all_variants:
        variant_classes = set(global_all_variants.keys())

    used_names = set(existing_class_mappings.values())
    # 遍历过滤后的DataFrame
    for idx, row in filtered_df.iterrows():
        class_combo = row['类组合']
        # 排除:
        # 1. 已有映射的类
        # 2. 已识别为颜色变体的类
        if (class_combo in existing_class_mappings or
            class_combo in variant_classes):
            continue

        # 为非变体类生成建议
        name = suggest_name(class_combo, row['上下文'], used_names)
        regular_suggestions[class_combo] = name
        used_names.add(name)

    # 保存建议供编辑使用
    global global_regular_suggestions, global_template_suggestions
    global_regular_suggestions = regular_suggestions
    global_template_suggestions = template_suggestions

    # 显示模板建议
    if template_suggestions:
        print(f"为 {len(template_suggestions)} 个颜色变体模板生成了名称建议:")

        template_data = []
        for template, name in template_suggestions.items():
            data = global_similar_classes[template]
            colors = ", ".join(sorted(data['colors']))
            total_count = data['total_count']
            variant_count = len(data['variants'])
            example = data['variants'][0]['original']

            template_data.append({
                '模板': template,
                '变体数量': variant_count,
                '总出现次数': total_count,
                '建议名称': name,
                '颜色参数': colors,
                '示例': example[:50] + ('...' if len(example) > 50 else '')
            })

        template_df = pd.DataFrame(template_data)
        template_df = template_df.sort_values(by='总出现次数', ascending=False)

        display(HTML(template_df.to_html(escape=False)))
    else:
        print("没有需要处理的颜色变体模板")

    # 显示常规类建议
    if regular_suggestions:
        print(f"\n为 {len(regular_suggestions)} 个常规类组合生成了名称建议 (已排除颜色变体):")

        suggestion_data = []
        for class_combo, name in regular_suggestions.items():
            row = df[df['类组合'] == class_combo].iloc[0]
            suggestion_data.append({
                '类组合': class_combo,
                '出现次数': row['出现次数'],
                '长度': len(class_combo),
                '建议名称': name,
                '名称长度': len(name),
                '长度比较': '⚠️ 名称更长' if (len(name)+3) > len(class_combo) else '✓ 名称更短',
                '上下文': row['上下文']
            })

        suggestion_df = pd.DataFrame(suggestion_data)
        suggestion_df = suggestion_df.sort_values(by='出现次数', ascending=False)

        display(HTML(suggestion_df.to_html(escape=False)))
    else:
        print("\n没有需要处理的常规类组合")

    # 显示被排除的统计信息
    if 'global_all_variants' in globals() and global_all_variants:
        excluded_count = len(set(global_all_variants.keys()) & set(filtered_df['类组合']))
        print(f"\n已排除 {excluded_count} 个已识别为颜色变体的类组合")

suggest_button.on_click(on_suggest_clicked)

In [None]:
# 第四个单元格：编辑和保存名称（修订版）
def create_editable_interface():
    """创建可编辑的映射界面"""
    # 检查是否已生成建议
    if ('global_regular_suggestions' not in globals() or not global_regular_suggestions) and \
       ('global_template_suggestions' not in globals() or not global_template_suggestions):
        print("请先生成名称建议")
        return

    # 1. 首先处理颜色变体模板
    template_widgets = {}
    if 'global_template_suggestions' in globals() and global_template_suggestions:
        print("<h3>编辑颜色变体模板名称</h3>")
        display(HTML("<p>颜色变体模板会自动处理多种颜色变体，并应用适当的颜色参数。</p>"))

        template_data = []
        for template, name in global_template_suggestions.items():
            data = global_similar_classes[template]
            colors = ", ".join(sorted(data['colors']))
            total_count = data['total_count']
            variant_count = len(data['variants'])

            template_data.append((template, name, colors, total_count, variant_count))

        # 按总使用次数排序
        template_data.sort(key=lambda x: x[3], reverse=True)

        for i, (template, suggested_name, colors, total_count, variant_count) in enumerate(template_data):
            # 获取变体示例
            variants = global_similar_classes[template]['variants']
            examples = [v['original'] for v in variants[:2]]  # 只显示前两个变体

            # 创建输入控件
            w = widgets.Text(
                value=suggested_name,
                description=f'模板 #{i+1} [变体:{variant_count}, 次数:{total_count}]:',
                style={'description_width': 'initial'},
                layout=widgets.Layout(width='60%')
            )
            template_widgets[template] = w

            # 显示控件和信息
            display(HTML(f"<b>颜色参数:</b> {colors}<br><b>变体示例 1:</b> {examples[0][:50]}" +
                         (f"...<br><b>变体示例 2:</b> {examples[1][:50]}" + ("..." if len(examples[1]) > 50 else "") if len(examples) > 1 else "")))
            display(w)
            display(HTML("<hr style='margin: 10px 0'>"))

    # 2. 然后处理常规类映射
    class_widgets = {}
    class_original_suggestions = {}  # 存储原始的建议名称

    if 'global_regular_suggestions' in globals() and global_regular_suggestions:
        print("<h3>编辑常规类映射名称</h3>")
        display(HTML("<p>常规类映射用于替换非参数化的类组合。</p>"))

        # 按频率排序
        class_data = []
        for class_combo, name in global_regular_suggestions.items():
            row = global_df[global_df['类组合'] == class_combo].iloc[0]
            class_data.append((class_combo, name, row['出现次数'], row['上下文'], len(class_combo)))

        # 按出现次数排序
        class_data.sort(key=lambda x: x[2], reverse=True)

        for i, (class_combo, suggested_name, count, context, length) in enumerate(class_data):
            # 保存原始建议名称
            class_original_suggestions[class_combo] = suggested_name

            # 长度比较
            name_length = len(suggested_name)
            length_warning = ""
            if (name_length+3) > length:
                length_warning = f"<span style='color:red'>⚠️ 建议名称比原类组合更长! ({name_length} > {length})</span>"

            # 显示截断的类名
            display_class = (class_combo[:80] + "...") if len(class_combo) > 80 else class_combo

            # 创建输入控件
            w = widgets.Text(
                value=suggested_name,
                description=f'#{i+1} [次数:{count}]:',
                style={'description_width': 'initial'},
                layout=widgets.Layout(width='60%')
            )
            class_widgets[class_combo] = w

            # 显示控件和上下文
            display(HTML(f"<b>类组合:</b> {display_class}<br><b>上下文:</b> {context}<br>{length_warning}"))
            display(w)
            display(HTML("<hr style='margin: 5px 0'>"))

    # 保存按钮
    save_button = widgets.Button(
        description="保存映射",
        button_style='success',
        layout=widgets.Layout(width='200px')
    )
    save_output = widgets.Output()

    @save_output.capture()
    def on_save_clicked(b):
        # 收集编辑后的名称
        edited_class_names = {}
        edited_template_names = {}

        # 检查名称唯一性
        all_names = set()
        # all_names_class = set()
        duplicates = []
        # duplicates_class = []

        # 收集模板名称（优先处理）
        for template, widget in template_widgets.items():
            name = widget.value.strip()
            if name:
                if name in all_names:
                    duplicates.append(name)
                else:
                    all_names.add(name)
                    edited_template_names[template] = name

        # 收集类名称
        for class_combo, widget in class_widgets.items():
            name = widget.value.strip()
            original_suggestion = class_original_suggestions[class_combo]

            # 如果名称比原类组合长 且 没有被修改，则跳过
            if (len(name)+3) > len(class_combo) and name == original_suggestion:
                print(f"跳过名称更长的映射: '{name}' -> '{class_combo}'")
                continue

            if name:
                if name in all_names:
                    duplicates.append(name)
                else:
                    all_names.add(name)
                    edited_class_names[class_combo] = name

        # 检查冲突
        if duplicates:
            print("⚠️ 错误: 发现名称冲突")
            for dup in duplicates:
                print(f"冲突名称: {dup}")

                # 显示使用此名称的所有类/模板
                for cls, name in edited_class_names.items():
                    if name == dup:
                        print(f" - 类: {cls}")

                for tmpl, name in edited_template_names.items():
                    if name == dup:
                        print(f" - 模板: {tmpl}")

            return

        # 保存到JS文件
        js_file = js_file_input.value

        # 加载现有映射
        existing_class_mappings, existing_template_mappings = load_js_mappings(js_file)

        # 合并映射
        all_class_mappings = {**existing_class_mappings, **edited_class_names}
        all_template_mappings = {**existing_template_mappings, **edited_template_names}

        # 更新JS文件
        save_mappings_to_js(js_file, all_class_mappings, all_template_mappings)

        print(f"✅ 已成功保存映射到 {js_file}")
        print(f"- 新增 {len(edited_class_names)} 个类映射 (总计: {len(all_class_mappings)})")
        print(f"- 新增 {len(edited_template_names)} 个模板映射 (总计: {len(all_template_mappings)})")

        # 提供下一步指导
        print("\n下一步操作:")
        print("1. 查看JS文件内容确认是否正确")
        print("2. 使用第5单元格将映射应用到HTML文件")

    save_button.on_click(on_save_clicked)
    display(save_button)
    display(save_output)

def save_mappings_to_js(js_file, class_mappings, template_mappings):
    """保存映射到JS文件"""
    # 准备JS内容
    if os.path.exists(js_file):
        with open(js_file, 'r', encoding='utf-8') as f:
            content = f.read()
    else:
        # 创建新文件
        content = """/**
 * Tailwind类简化映射
 *
 * 这个文件定义了常用Tailwind类组合的简化映射，目的是:
 * 1. 减少HTML文件中的重复代码
 * 2. 提高可读性和可维护性
 * 3. 减少LLM上下文窗口大小
 */

// 定义类名映射
const twClasses = {
};

// 定义参数化模板
const twTemplates = {
};

// 页面加载时应用类名
document.addEventListener('DOMContentLoaded', function() {
  console.log('正在应用Tailwind类映射...');

  // 应用普通类映射
  const elements = document.querySelectorAll('[data-tw]');
  elements.forEach(el => {
    const classNames = el.getAttribute('data-tw').split(' ');
    classNames.forEach(name => {
      if (twClasses[name]) {
        twClasses[name].split(' ').forEach(cls => {
          el.classList.add(cls);
        });
      }
    });
  });

  // 应用参数化模板
  const paramElements = document.querySelectorAll('[data-tw-param]');
  paramElements.forEach(el => {
    const paramData = el.getAttribute('data-tw-param').split(':');
    if (paramData.length >= 2) {
      const templateName = paramData[0];
      const params = paramData[1].split(',');

      // 检查是否有匹配的模板
      if (twTemplates[templateName]) {
        let classString = twTemplates[templateName];

        // 替换颜色参数
        for (let i = 1; i <= 5; i++) {
          const pattern = new RegExp(`{color-${i}}`, 'g');
          if (params[i-1]) {
            classString = classString.replace(pattern, params[i-1]);
          }
        }

        // 添加处理后的类
        classString.split(' ').forEach(cls => {
          el.classList.add(cls);
        });
      }
    }
  });

  console.log('Tailwind类映射应用完成!');
});
"""

    # 格式化类映射
    class_content = ""

    # 将映射分组
    grouped_mappings = {
        'nav': [],      # 导航相关
        'btn': [],      # 按钮相关
        'text': [],     # 文本相关
        'card': [],     # 卡片相关
        'container': [], # 容器相关
        'layout': [],   # 布局相关
        'banner': [],   # 横幅相关
        'avatar': [],   # 头像相关
        'badge': [],    # 徽章相关
        'other': []     # 其他
    }

    # 将类映射分组
    for class_combo, name in sorted(class_mappings.items(), key=lambda x: x[1]):
        assigned = False
        for group_key in ['nav', 'btn', 'text', 'card', 'container', 'layout', 'banner', 'avatar', 'badge']:
            if group_key in name:
                grouped_mappings[group_key].append((name, class_combo))
                assigned = True
                break

        if not assigned:
            grouped_mappings['other'].append((name, class_combo))

    # 格式化每个组
    for group_name, group_items in [
        ('导航相关', grouped_mappings['nav']),
        ('按钮相关', grouped_mappings['btn']),
        ('文本相关', grouped_mappings['text']),
        ('卡片相关', grouped_mappings['card']),
        ('容器相关', grouped_mappings['container']),
        ('布局相关', grouped_mappings['layout']),
        ('横幅相关', grouped_mappings['banner']),
        ('头像相关', grouped_mappings['avatar']),
        ('徽章相关', grouped_mappings['badge']),
        ('其他组件', grouped_mappings['other'])
    ]:
        if group_items:
            class_content += f"  // {group_name}\n"
            for name, class_combo in sorted(group_items):
                class_content += f"  '{name}': '{class_combo}',\n"
            class_content += "\n"

    # 格式化模板映射
    template_content = ""
    for template, name in sorted(template_mappings.items(), key=lambda x: x[1]):
        template_content += f"  '{name}': '{template}',\n"

    # 更新JS内容
    if 'twClasses = {' in content:
        content = re.sub(
            r'const\s+twClasses\s*=\s*{[^}]*}',
            f'const twClasses = {{\n{class_content}}}',
            content
        )

    if 'twTemplates = {' in content:
        content = re.sub(
            r'const\s+twTemplates\s*=\s*{[^}]*}',
            f'const twTemplates = {{\n{template_content}}}',
            content
        )

    # 保存文件
    with open(js_file, 'w', encoding='utf-8') as f:
        f.write(content)

# 编辑界面
edit_button = widgets.Button(
    description="编辑映射名称",
    button_style='info',
    layout=widgets.Layout(width='200px')
)
edit_output = widgets.Output()

display(HTML("<h3>步骤3: 编辑和保存映射</h3>"))
display(edit_button)
display(edit_output)

@edit_output.capture()
def on_edit_clicked(b):
    create_editable_interface()

edit_button.on_click(on_edit_clicked)

In [None]:

# 预定义颜色名列表
COLORS = common_colors.copy()

# 颜色名正则模式
global COLOR_NAMES_PATTERN
COLOR_NAMES_PATTERN = '|'.join(COLORS)

def extract_template_regex(template_str):
    """将参数化模板转换为正则表达式"""
    # 查找所有颜色参数
    params = re.findall(r'\{(color-\d+)\}', template_str)

    # 构建正则表达式 - 保留原始模式但将参数替换为捕获组
    regex_pattern = template_str
    param_positions = {}

    for i, param in enumerate(params):
        # 将 {color-1} 替换为捕获组 (颜色名)
        param_regex = f"({COLOR_NAMES_PATTERN})"
        regex_pattern = regex_pattern.replace(f"{{{param}}}", param_regex)
        # 记录参数位置
        param_positions[i+1] = param.split('-')[1]

    # 确保它是一个完整的类匹配模式
    return regex_pattern, param_positions, params

def apply_mappings_to_html(html_file, js_file, output_file=None):
    """智能应用映射到HTML文件 - 优化版"""
    # 确定输出文件名
    if output_file is None:
        name, ext = os.path.splitext(html_file)
        output_file = f"{name}_mapped{ext}"

    # 加载映射
    class_mappings, template_mappings = load_js_mappings(js_file)

    # 读取HTML文件
    with open(html_file, 'r', encoding='utf-8') as f:
        html_content = f.read()

    # 分析HTML中的class属性
    class_pattern = re.compile(r'class=["\']([^"\']+)["\']')
    class_attrs = [(m.group(0), m.group(1)) for m in class_pattern.finditer(html_content)]

    print(f"\n分析HTML中的class属性...")
    print(f"找到 {len(class_attrs)} 个class属性")

    # 显示几个示例
    for i, (attr, classes) in enumerate(class_attrs[:5]):
        print(f"- {attr}")

    # 统计替换
    template_replacements = []
    class_replacements = []

    # === 处理模板替换（颜色变体）===
    print("\n替换颜色变体模板...")

    for template_str, template_name in template_mappings.items():
        print(f"\n处理模板 '{template_name}'")
        print(f"原始模板: {template_str}")

        # 如果没有颜色参数，跳过
        if '{color-' not in template_str:
            continue

        # 提取参数并构建正则
        template_regex, param_indices, params = extract_template_regex(template_str)
        print(f"检测到参数: {params}")
        if template_name == 't-themed-2':
            print(f"模板正则: {template_regex}")
            print(f"参数位置: {param_indices}")

        # 直接匹配并替换
        processed_count = 0

        for attr_full, class_attr in class_attrs:
            # 准备正则匹配模式 - 实际需要的是class内容的模式
            pattern = re.compile('^' + template_regex + '$')
            match = pattern.match(class_attr)

            if match:
                # 获取捕获的颜色值
                color_values = match.groups()
                color_params = {}

                # # 构建参数映射
                # for i, color in enumerate(color_values):
                #     param_key = f"color-{param_indices.get(i+1, i+1)}"
                #     color_params[param_key] = color

                params_str = ','.join(color_values)
                replacement = f'data-tw-param="{template_name}:{params_str}"'
                # 创建替换字符串
                # params_str = ':'.join([f"{k}={v}" for k, v in color_params.items()])
                # replacement = f'data-tw="{template_name}" data-tw-params="{params_str}"'

                # 替换HTML中的实例
                new_attr = attr_full.replace('class="' + class_attr + '"', replacement)
                new_attr = new_attr.replace("class='" + class_attr + "'", replacement)

                if template_name == 't-themed-2':
                    print(f"模板替换: {new_attr}")
                    print(f"原始类: {class_attr}")
                html_content = html_content.replace(attr_full, new_attr)
                processed_count += 1

                # 记录替换
                template_replacements.append({
                    'template': template_name,
                    'original': class_attr,
                    'params': params_str,
                    'replacement': replacement
                })

        if processed_count > 0:
            print(f"✅ 替换了 {processed_count} 个 '{template_name}' 变体")
        else:
            print(f"❌ 未找到 '{template_name}' 的匹配")

            # 调试信息
            print("调试：")
            print(f"- 模板正则: {template_regex}")

            # 查找近似匹配
            similar_classes = []
            template_parts = set(re.sub(r'\{color-\d+\}', 'COLOR', template_str).split())

            for _, class_attr in class_attrs:
                # normalized = re.sub(r'(text|bg|border)-[a-z]+-\d+', r'\1-COLOR-000', class_attr)
                normalized = re.sub(r'(text|bg|border)-([a-z]+)(?:-\d+)?', r'\1-COLOR-000', class_attr)
                class_parts = set(normalized.split())
                similarity = len(template_parts & class_parts) / max(len(template_parts), 1)

                if similarity > 0.7:  # 70%相似度
                    similar_classes.append((class_attr, similarity))

            # 显示最相似的类
            if similar_classes:
                print("发现可能的相似类:")
                for cls, sim in sorted(similar_classes, key=lambda x: x[1], reverse=True)[:3]:
                    print(f"- 相似度 {sim:.2f}: {cls}")

    # === 替换常规类映射 ===
    print("\n替换常规类映射...")
    for class_combo, name in class_mappings.items():
        processed_count = 0

        for attr_full, class_attr in class_attrs:
            if class_attr == class_combo:
                replacement = f'data-tw="{name}"'

                # 替换HTML中的实例
                new_attr = attr_full.replace('class="' + class_attr + '"', replacement)
                new_attr = new_attr.replace("class='" + class_attr + "'", replacement)

                html_content = html_content.replace(attr_full, new_attr)
                processed_count += 1

                # 记录替换
                class_replacements.append({
                    'name': name,
                    'original': class_attr
                })

        if processed_count > 0:
            print(f"替换了 {processed_count} 个 '{name}' 实例")

    # 保存更新后的HTML
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(html_content)

    # 分析未处理的长类
    soup = BeautifulSoup(html_content, 'lxml')
    remaining = []

    for element in soup.find_all(attrs={'class': True}):
        class_str = ' '.join(element['class'])
        if len(class_str) > 30:  # 只关注长类
            remaining.append({
                'class': class_str,
                'length': len(class_str),
                'context': element.name + (' #' + element.get('id') if element.get('id') else '')
            })

    # 按长度排序
    remaining.sort(key=lambda x: x['length'], reverse=True)

    result = {
        'output_file': output_file,
        'template_replaced': len(template_replacements),
        'template_details': template_replacements,
        'class_replaced': len(class_replacements),
        'class_details': class_replacements,
        'remaining': remaining[:10]
    }

    # 替换总结
    total_replaced = result['template_replaced'] + result['class_replaced']
    print(f"\n📊 替换总结:")
    print(f"- 总替换数: {total_replaced}")
    print(f"- 颜色变体模板: {result['template_replaced']}")
    print(f"- 常规类映射: {result['class_replaced']}")

    return result

# 应用映射界面
html_input = widgets.Text(value='index.html', description='HTML文件:')
js_input = widgets.Text(value='tailwind-classes.js', description='JS文件:')
output_input = widgets.Text(value='', description='输出文件:', placeholder='留空使用默认名称')
apply_button = widgets.Button(
    description="应用映射",
    button_style='success',
    layout=widgets.Layout(width='200px')
)
apply_output = widgets.Output()

display(HTML("<h3>步骤4: 应用映射到HTML</h3>"))
display(widgets.VBox([html_input, js_input, output_input]))
display(apply_button)
display(apply_output)

@apply_output.capture()
def on_apply_clicked(b):
    html_file = html_input.value
    js_file = js_input.value
    output_file = output_input.value or None

    if not os.path.exists(html_file):
        print(f"❌ 错误: HTML文件 {html_file} 不存在")
        return

    if not os.path.exists(js_file):
        print(f"❌ 错误: JS文件 {js_file} 不存在")
        return

    print(f"正在应用映射 {js_file} 到 {html_file}...")
    result = apply_mappings_to_html(html_file, js_file, output_file)

    print(f"✅ 已生成更新后的HTML文件: {result['output_file']}")

    # 报告模板替换
    if result['template_replaced'] > 0:
        print(f"\n🔄 替换了 {result['template_replaced']} 个颜色变体模板:")

        # 按替换次数排序显示详情
        for i, detail in enumerate(result['template_details']):
            template = detail['template']
            params_str = detail['params']
            # params_str = ', '.join([f"{k}={v}" for k, v in params.items()])

            # 截断过长的原始类
            original = detail['original']
            if len(original) > 50:
                original = original[:47] + "..."

            print(f"{i+1}. {template} ({params_str}) - {original}")
    else:
        print("\n❌ 没有替换任何颜色变体模板")

    # 报告常规类替换
    if result['class_replaced'] > 0:
        print(f"\n🔄 替换了 {result['class_replaced']} 个常规类组合")

        # 只显示前10个
        for i, detail in enumerate(result['class_details'][:10]):
            name = detail['name']

            # 截断过长的原始类
            original = detail['original']
            if len(original) > 50:
                original = original[:47] + "..."

            print(f"{i+1}. {name} - {original}")

        if len(result['class_details']) > 10:
            print(f"... 还有 {len(result['class_details']) - 10} 个类映射替换 (已省略)")
    else:
        print("\n❌ 没有替换任何常规类组合")

    # 报告剩余长类
    if result['remaining']:
        print(f"\n⚠️ 仍有 {len(result['remaining'])} 个长类组合未处理:")

        for i, item in enumerate(result['remaining']):
            print(f"{i+1}. 长度{item['length']} ({item['context']}): {item['class'][:50]}...")
    else:
        print("\n✅ 没有剩余的长类组合")

apply_button.on_click(on_apply_clicked)

In [None]:
# abandoned

In [None]:
# 修正版：应用映射函数（带remaining键）
def apply_mappings_to_html(html_file, js_file, output_file=None):
    """将映射应用到HTML文件（文本替换方式）- 带详细诊断日志"""
    # 确定输出文件名
    if output_file is None:
        name, ext = os.path.splitext(html_file)
        output_file = f"{name}_mapped{ext}"

    # 加载映射
    class_mappings, template_mappings = load_js_mappings(js_file)

    # 读取HTML文件
    with open(html_file, 'r', encoding='utf-8') as f:
        html_content = f.read()

    # === 调试: 检查HTML文件中的一些class属性 ===
    print("\n分析HTML中的class属性...")
    class_pattern = r'class=["\']([^"\']+)["\']'
    sample_classes = list(re.finditer(class_pattern, html_content))[:5]
    print(f"找到 {len(re.findall(class_pattern, html_content))} 个class属性")
    print("示例class属性:")
    for match in sample_classes:
        print(f"- {match.group(0)}")

    template_replacements = []

    # 处理每个模板
    print("\n尝试替换颜色变体模板...")
    for template_str, template_name in template_mappings.items():
        print(f"\n处理模板 '{template_name}'")
        print(f"原始模板: {template_str}")

        # 如果模板使用颜色参数
        if '{color-' in template_str:
            # 提取模板中的参数名
            param_keys = []
            for i in range(1, 6):
                if f'{{color-{i}}}' in template_str:
                    param_keys.append(f'color-{i}')

            print(f"检测到参数: {param_keys}")

            # 获取可能的颜色值
            colors = ['primary', 'secondary', 'accent', 'gray', 'red', 'blue', 'green', 'yellow',
                     'indigo', 'purple', 'pink', 'orange', 'teal', 'cyan', 'amber', 'lime', 'emerald',
                     'rose', 'fuchsia', 'sky', 'violet']

            # 直接尝试测试案例（对于调试）
            test_cases = []

            # 单参数情况
            if len(param_keys) == 1:
                for color in colors:
                    concrete_class = template_str.replace('{color-1}', color)
                    test_cases.append((concrete_class, [color]))

            # 双参数情况
            elif len(param_keys) == 2:
                for color1 in colors[:5]:  # 限制组合数量
                    for color2 in [c for c in colors[:5] if c != color1]:
                        concrete_class = template_str.replace('{color-1}', color1).replace('{color-2}', color2)
                        test_cases.append((concrete_class, [color1, color2]))

            # 测试生成的具体类
            print(f"测试 {len(test_cases)} 个可能的颜色组合")
            for i, (test_class, params) in enumerate(test_cases[:3]):  # 只显示前3个
                print(f"测试案例 {i+1}: {test_class[:50]}..." if len(test_class) > 50 else test_class)

                # 检查这个类是否在HTML中
                escaped_class = re.escape(test_class)
                pattern = r'class=["\']' + escaped_class + r'["\']'

                # 直接搜索和部分搜索
                exact_matches = re.findall(pattern, html_content)
                partial_matches = []

                # 部分匹配（忽略空格差异）
                if not exact_matches:
                    normalized_test = ' '.join(sorted(test_class.split()))
                    class_attrs = re.findall(r'class=["\']([^"\']+)["\']', html_content)
                    for attr in class_attrs:
                        normalized_attr = ' '.join(sorted(attr.split()))
                        if normalized_test == normalized_attr:
                            partial_matches.append(attr)

                print(f"  - 精确匹配: {len(exact_matches)}")
                print(f"  - 部分匹配 (忽略顺序): {len(partial_matches)}")

                if exact_matches:
                    print(f"  - 找到匹配! 示例: {exact_matches[0]}")
                elif partial_matches:
                    print(f"  - 找到部分匹配! 示例: {partial_matches[0]}")
                    print(f"  - 正则模式: {pattern}")

                # 测试替换（仅当有匹配时）
                if exact_matches:
                    replacement = f'data-tw-param="{template_name}:{",".join(params)}"'
                    new_content = re.sub(pattern, replacement, html_content)
                    replaced = new_content != html_content
                    print(f"  - 替换结果: {'成功' if replaced else '失败'}")
                    if replaced:
                        html_content = new_content
                        template_replacements.append({
                            'template': template_name,
                            'original': test_class,
                            'params': params,
                            'count': len(exact_matches)
                        })

            # 尝试实际替换所有组合
            for test_class, params in test_cases:
                escaped_class = re.escape(test_class)
                pattern = r'class=["\']' + escaped_class + r'["\']'
                replacement = f'data-tw-param="{template_name}:{",".join(params)}"'

                matches_found = len(re.findall(pattern, html_content))
                if matches_found > 0:
                    new_content = re.sub(pattern, replacement, html_content)
                    if new_content != html_content:
                        html_content = new_content
                        template_replacements.append({
                            'template': template_name,
                            'original': test_class,
                            'params': params,
                            'count': matches_found
                        })

    # === 替换常规类映射 ===
    class_replacements = []

    for class_combo, name in class_mappings.items():
        escaped_class = re.escape(class_combo)
        pattern = r'class=["\']' + escaped_class + r'["\']'
        replacement = f'data-tw="{name}"'

        matches_found = len(re.findall(pattern, html_content))
        if matches_found > 0:
            new_content = re.sub(pattern, replacement, html_content)
            if new_content != html_content:
                html_content = new_content
                class_replacements.append({
                    'name': name,
                    'original': class_combo,
                    'count': matches_found
                })

    # 保存更新后的HTML
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(html_content)

    # 分析结果
    template_replaced_count = sum(r['count'] for r in template_replacements)
    class_replaced_count = sum(r['count'] for r in class_replacements)

    # 检查问题：如果没有模板替换，提供额外诊断
    if template_replaced_count == 0:
        print("\n=== 诊断信息 ===")
        print("模板替换失败诊断:")

        # 1. 检查模板格式
        has_param_templates = any('{color-' in t for t in template_mappings.keys())
        print(f"1. 模板中含有颜色参数: {'是' if has_param_templates else '否'}")

        # 2. 检查具体类在HTML中出现
        test_color_combinations = False

        for template in template_mappings.keys():
            if '{color-1}' in template:
                for color in ['primary', 'gray', 'red', 'blue', 'green']:
                    test_class = template.replace('{color-1}', color)
                    if '{color-' not in test_class:  # 确保所有参数都被替换
                        # 检查这个类是否存在于HTML中
                        # 使用宽松的搜索，只搜索类名而不是整个属性
                        if test_class in html_content:
                            print(f"2. 找到匹配颜色变体: {test_class[:50]}...")
                            test_color_combinations = True
                            break

        if not test_color_combinations:
            print("2. 未找到任何颜色变体在HTML中匹配")

            # 3. 检查类名空格或顺序问题
            print("\n3. 检查具体案例:")
            for template, name in list(template_mappings.items())[:1]:  # 只检查第一个模板
                if '{color-1}' in template:
                    # 创建一个具体的类
                    concrete = template.replace('{color-1}', 'primary')
                    if '{color-2}' in concrete:
                        concrete = concrete.replace('{color-2}', 'gray')

                    print(f"模板 '{name}' 具体化为: {concrete}")

                    # 搜索HTML中有类似类名的元素
                    class_parts = set(concrete.split())
                    pattern = r'class=["\']([^"\']+)["\']'

                    similar_classes = []
                    for match in re.finditer(pattern, html_content):
                        html_class = match.group(1)
                        html_parts = set(html_class.split())

                        # 计算重叠度
                        overlap = len(class_parts & html_parts) / len(class_parts)
                        if overlap > 0.8:  # 80%重叠
                            similar_classes.append({
                                'class': html_class,
                                'overlap': overlap,
                                'full_attr': match.group(0)
                            })

                    if similar_classes:
                        print(f"找到 {len(similar_classes)} 个相似类:")
                        for i, item in enumerate(similar_classes[:3]):  # 显示前3个
                            print(f"  {i+1}. 重叠度 {item['overlap']:.2f}: {item['class']}")
                            print(f"     完整属性: {item['full_attr']}")
                    else:
                        print("未找到相似类")

    # 检查剩余的长类组合 (添加missing的'remaining'键)
    soup = BeautifulSoup(html_content, 'lxml')
    remaining = []

    for element in soup.find_all(True):
        if 'class' in element.attrs and element['class']:
            class_str = ' '.join(element['class'])
            if len(class_str) > 30:  # 只关注长类
                remaining.append({
                    'class': class_str,
                    'length': len(class_str),
                    'context': element.name + (' #' + element.get('id') if element.get('id') else '')
                })

    # 按长度排序
    remaining.sort(key=lambda x: x['length'], reverse=True)

    return {
        'output_file': output_file,
        'template_replaced': template_replaced_count,
        'template_details': template_replacements,
        'class_replaced': class_replaced_count,
        'class_details': class_replacements,
        'remaining': remaining[:10]  # 添加这个键来修复KeyError
    }


# 应用映射界面
html_input = widgets.Text(value='index.html', description='HTML文件:')
js_input = widgets.Text(value='tailwind-classes.js', description='JS文件:')
output_input = widgets.Text(value='', description='输出文件:', placeholder='留空使用默认名称')
apply_button = widgets.Button(
    description="应用映射",
    button_style='success',
    layout=widgets.Layout(width='200px')
)
apply_output = widgets.Output()

display(HTML("<h3>步骤4: 应用映射到HTML</h3>"))
display(widgets.VBox([html_input, js_input, output_input]))
display(apply_button)
display(apply_output)

@apply_output.capture()
def on_apply_clicked(b):
    html_file = html_input.value
    js_file = js_input.value
    output_file = output_input.value or None

    if not os.path.exists(html_file):
        print(f"❌ 错误: HTML文件 {html_file} 不存在")
        return

    if not os.path.exists(js_file):
        print(f"❌ 错误: JS文件 {js_file} 不存在")
        return

    print(f"正在应用映射 {js_file} 到 {html_file}...")
    result = apply_mappings_to_html(html_file, js_file, output_file)

    print(f"✅ 已生成更新后的HTML文件: {result['output_file']}")

    # 报告模板替换
    if result['template_replaced'] > 0:
        print(f"\n🔄 替换了 {result['template_replaced']} 个颜色变体模板:")

        # 按替换数量排序
        sorted_details = sorted(result['template_details'], key=lambda x: x['count'], reverse=True)

        for i, detail in enumerate(sorted_details):
            template = detail['template']
            params = ','.join(detail['params'])
            count = detail['count']

            # 截断过长的原始类
            original = detail['original']
            if len(original) > 50:
                original = original[:47] + "..."

            print(f"{i+1}. {template}:{params} ({count}次) - {original}")
    else:
        print("\n❌ 没有替换任何颜色变体模板")

    # 报告常规类替换
    if result['class_replaced'] > 0:
        print(f"\n🔄 替换了 {result['class_replaced']} 个常规类组合:")

        # 按替换数量排序
        sorted_details = sorted(result['class_details'], key=lambda x: x['count'], reverse=True)

        # 只显示前10个
        for i, detail in enumerate(sorted_details[:10]):
            name = detail['name']
            count = detail['count']

            # 截断过长的原始类
            original = detail['original']
            if len(original) > 50:
                original = original[:47] + "..."

            print(f"{i+1}. {name} ({count}次) - {original}")

        if len(sorted_details) > 10:
            print(f"... 还有 {len(sorted_details) - 10} 个类映射替换 (已省略)")
    else:
        print("\n❌ 没有替换任何常规类组合")

    # 报告剩余长类
    if result['remaining']:
        print(f"\n⚠️ 仍有 {len(result['remaining'])} 个长类组合未处理:")

        for i, item in enumerate(result['remaining']):
            print(f"{i+1}. 长度{item['length']} ({item['context']}): {item['class'][:50]}...")
    else:
        print("\n✅ 没有剩余的长类组合")

    # 提供应用结果小结
    total_replaced = result['template_replaced'] + result['class_replaced']
    print(f"\n📊 替换总结:")
    print(f"- 总替换数: {total_replaced}")
    print(f"- 颜色变体模板: {result['template_replaced']} ({round(result['template_replaced']/total_replaced*100 if total_replaced else 0, 1)}%)")
    print(f"- 常规类映射: {result['class_replaced']} ({round(result['class_replaced']/total_replaced*100 if total_replaced else 0, 1)}%)")

apply_button.on_click(on_apply_clicked)

In [None]:
def apply_mappings_to_html(html_file, js_file, output_file=None):
    """将映射应用到HTML文件（文本替换方式）- 带详细诊断日志"""
    # 确定输出文件名
    if output_file is None:
        name, ext = os.path.splitext(html_file)
        output_file = f"{name}_mapped{ext}"

    # 加载映射
    class_mappings, template_mappings = load_js_mappings(js_file)

    # === 调试 ===
    print(f"加载的模板映射数量: {len(template_mappings)}")
    for template, name in list(template_mappings.items())[:3]:
        print(f"模板示例 '{name}': {template[:50]}..." if len(template) > 50 else template)

    # 读取HTML文件
    with open(html_file, 'r', encoding='utf-8') as f:
        html_content = f.read()

    # === 调试: 检查HTML文件中的一些class属性 ===
    print("\n分析HTML中的class属性...")
    class_pattern = r'class=["\']([^"\']+)["\']'
    sample_classes = list(re.finditer(class_pattern, html_content))[:5]
    print(f"找到 {len(re.findall(class_pattern, html_content))} 个class属性")
    print("示例class属性:")
    for match in sample_classes:
        print(f"- {match.group(0)}")

    template_replacements = []

    # 处理每个模板
    print("\n尝试替换颜色变体模板...")
    for template_str, template_name in template_mappings.items():
        print(f"\n处理模板 '{template_name}'")
        print(f"原始模板: {template_str}")

        # 如果模板使用颜色参数
        if '{color-' in template_str:
            # 提取模板中的参数名
            param_keys = []
            for i in range(1, 6):
                if f'{{color-{i}}}' in template_str:
                    param_keys.append(f'color-{i}')

            print(f"检测到参数: {param_keys}")

            # 获取可能的颜色值
            colors = ['primary', 'secondary', 'accent', 'gray', 'red', 'blue', 'green', 'yellow',
                     'indigo', 'purple', 'pink', 'orange', 'teal', 'cyan', 'amber', 'lime', 'emerald']

            # 直接尝试测试案例（对于调试）
            test_cases = []

            # 单参数情况
            if len(param_keys) == 1:
                for color in colors:
                    concrete_class = template_str.replace('{color-1}', color)
                    test_cases.append((concrete_class, [color]))

            # 双参数情况
            elif len(param_keys) == 2:
                for color1 in colors[:5]:  # 限制组合数量
                    for color2 in [c for c in colors[:5] if c != color1]:
                        concrete_class = template_str.replace('{color-1}', color1).replace('{color-2}', color2)
                        test_cases.append((concrete_class, [color1, color2]))

            # 测试生成的具体类
            print(f"测试 {len(test_cases)} 个可能的颜色组合")
            for i, (test_class, params) in enumerate(test_cases[:3]):  # 只显示前3个
                print(f"测试案例 {i+1}: {test_class[:50]}..." if len(test_class) > 50 else test_class)

                # 检查这个类是否在HTML中
                escaped_class = re.escape(test_class)
                pattern = r'class=["\']' + escaped_class + r'["\']'

                # 直接搜索和部分搜索
                exact_matches = re.findall(pattern, html_content)
                partial_matches = []

                # 部分匹配（忽略空格差异）
                if not exact_matches:
                    normalized_test = ' '.join(sorted(test_class.split()))
                    class_attrs = re.findall(r'class=["\']([^"\']+)["\']', html_content)
                    for attr in class_attrs:
                        normalized_attr = ' '.join(sorted(attr.split()))
                        if normalized_test == normalized_attr:
                            partial_matches.append(attr)

                print(f"  - 精确匹配: {len(exact_matches)}")
                print(f"  - 部分匹配 (忽略顺序): {len(partial_matches)}")

                if exact_matches:
                    print(f"  - 找到匹配! 示例: {exact_matches[0]}")
                elif partial_matches:
                    print(f"  - 找到部分匹配! 示例: {partial_matches[0]}")
                    print(f"  - 正则模式: {pattern}")

                # 测试替换（仅当有匹配时）
                if exact_matches:
                    replacement = f'data-tw-param="{template_name}:{",".join(params)}"'
                    new_content = re.sub(pattern, replacement, html_content)
                    replaced = new_content != html_content
                    print(f"  - 替换结果: {'成功' if replaced else '失败'}")
                    if replaced:
                        html_content = new_content
                        template_replacements.append({
                            'template': template_name,
                            'original': test_class,
                            'params': params,
                            'count': len(exact_matches)
                        })

            # 尝试实际替换所有组合
            for test_class, params in test_cases:
                escaped_class = re.escape(test_class)
                pattern = r'class=["\']' + escaped_class + r'["\']'
                replacement = f'data-tw-param="{template_name}:{",".join(params)}"'

                matches_found = len(re.findall(pattern, html_content))
                if matches_found > 0:
                    new_content = re.sub(pattern, replacement, html_content)
                    if new_content != html_content:
                        html_content = new_content
                        template_replacements.append({
                            'template': template_name,
                            'original': test_class,
                            'params': params,
                            'count': matches_found
                        })

    # === 替换常规类映射（保持原样）===
    class_replacements = []

    for class_combo, name in class_mappings.items():
        escaped_class = re.escape(class_combo)
        pattern = r'class=["\']' + escaped_class + r'["\']'
        replacement = f'data-tw="{name}"'

        matches_found = len(re.findall(pattern, html_content))
        if matches_found > 0:
            new_content = re.sub(pattern, replacement, html_content)
            if new_content != html_content:
                html_content = new_content
                class_replacements.append({
                    'name': name,
                    'original': class_combo,
                    'count': matches_found
                })

    # 保存更新后的HTML
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(html_content)

    # 汇总结果
    template_replaced_count = sum(r['count'] for r in template_replacements)
    class_replaced_count = sum(r['count'] for r in class_replacements)

    # 检查问题：如果没有模板替换，提供额外诊断
    if template_replaced_count == 0:
        print("\n=== 诊断信息 ===")
        print("模板替换失败诊断:")

        # 1. 检查模板格式
        has_param_templates = any('{color-' in t for t in template_mappings.keys())
        print(f"1. 模板中含有颜色参数: {'是' if has_param_templates else '否'}")

        # 2. 检查具体类在HTML中出现
        test_color_combinations = False

        for template in template_mappings.keys():
            if '{color-1}' in template:
                for color in ['primary', 'gray', 'red', 'blue', 'green']:
                    test_class = template.replace('{color-1}', color)
                    if '{color-' not in test_class:  # 确保所有参数都被替换
                        # 检查这个类是否存在于HTML中
                        # 使用宽松的搜索，只搜索类名而不是整个属性
                        if test_class in html_content:
                            print(f"2. 找到匹配颜色变体: {test_class[:50]}...")
                            test_color_combinations = True
                            break

        if not test_color_combinations:
            print("2. 未找到任何颜色变体在HTML中匹配")

            # 3. 检查类名空格或顺序问题
            print("\n3. 检查具体案例:")
            for template, name in list(template_mappings.items())[:1]:  # 只检查第一个模板
                if '{color-1}' in template:
                    # 创建一个具体的类
                    concrete = template.replace('{color-1}', 'primary')
                    if '{color-2}' in concrete:
                        concrete = concrete.replace('{color-2}', 'gray')

                    print(f"模板 '{name}' 具体化为: {concrete}")

                    # 搜索HTML中有类似类名的元素
                    class_parts = set(concrete.split())
                    pattern = r'class=["\']([^"\']+)["\']'

                    similar_classes = []
                    for match in re.finditer(pattern, html_content):
                        html_class = match.group(1)
                        html_parts = set(html_class.split())

                        # 计算重叠度
                        overlap = len(class_parts & html_parts) / len(class_parts)
                        if overlap > 0.8:  # 80%重叠
                            similar_classes.append({
                                'class': html_class,
                                'overlap': overlap,
                                'full_attr': match.group(0)
                            })

                    if similar_classes:
                        print(f"找到 {len(similar_classes)} 个相似类:")
                        for i, item in enumerate(similar_classes[:3]):  # 显示前3个
                            print(f"  {i+1}. 重叠度 {item['overlap']:.2f}: {item['class']}")
                            print(f"     完整属性: {item['full_attr']}")
                    else:
                        print("未找到相似类")

    return {
        'output_file': output_file,
        'template_replaced': template_replaced_count,
        'template_details': template_replacements,
        'class_replaced': class_replaced_count,
        'class_details': class_replacements
    }

# 应用映射界面
html_input = widgets.Text(value='index.html', description='HTML文件:')
js_input = widgets.Text(value='tailwind-classes.js', description='JS文件:')
output_input = widgets.Text(value='', description='输出文件:', placeholder='留空使用默认名称')
apply_button = widgets.Button(
    description="应用映射",
    button_style='success',
    layout=widgets.Layout(width='200px')
)
apply_output = widgets.Output()

display(HTML("<h3>步骤4: 应用映射到HTML</h3>"))
display(widgets.VBox([html_input, js_input, output_input]))
display(apply_button)
display(apply_output)

@apply_output.capture()
def on_apply_clicked(b):
    html_file = html_input.value
    js_file = js_input.value
    output_file = output_input.value or None

    if not os.path.exists(html_file):
        print(f"❌ 错误: HTML文件 {html_file} 不存在")
        return

    if not os.path.exists(js_file):
        print(f"❌ 错误: JS文件 {js_file} 不存在")
        return

    print(f"正在应用映射 {js_file} 到 {html_file}...")
    result = apply_mappings_to_html(html_file, js_file, output_file)

    print(f"✅ 已生成更新后的HTML文件: {result['output_file']}")

    # 报告模板替换
    if result['template_replaced'] > 0:
        print(f"\n🔄 替换了 {result['template_replaced']} 个颜色变体模板:")

        # 按替换数量排序
        sorted_details = sorted(result['template_details'], key=lambda x: x['count'], reverse=True)

        for i, detail in enumerate(sorted_details):
            template = detail['template']
            params = ','.join(detail['params'])
            count = detail['count']

            # 截断过长的原始类
            original = detail['original']
            if len(original) > 50:
                original = original[:47] + "..."

            print(f"{i+1}. {template}:{params} ({count}次) - {original}")
    else:
        print("\n❌ 没有替换任何颜色变体模板")

    # 报告常规类替换
    if result['class_replaced'] > 0:
        print(f"\n🔄 替换了 {result['class_replaced']} 个常规类组合:")

        # 按替换数量排序
        sorted_details = sorted(result['class_details'], key=lambda x: x['count'], reverse=True)

        # 只显示前10个
        for i, detail in enumerate(sorted_details[:10]):
            name = detail['name']
            count = detail['count']

            # 截断过长的原始类
            original = detail['original']
            if len(original) > 50:
                original = original[:47] + "..."

            print(f"{i+1}. {name} ({count}次) - {original}")

        if len(sorted_details) > 10:
            print(f"... 还有 {len(sorted_details) - 10} 个类映射替换 (已省略)")
    else:
        print("\n❌ 没有替换任何常规类组合")

    # 报告剩余长类
    if result['remaining']:
        print(f"\n⚠️ 仍有 {len(result['remaining'])} 个长类组合未处理:")

        for i, item in enumerate(result['remaining']):
            print(f"{i+1}. 长度{item['length']} ({item['context']}): {item['class'][:50]}...")
    else:
        print("\n✅ 没有剩余的长类组合")

    # 提供应用结果小结
    total_replaced = result['template_replaced'] + result['class_replaced']
    print(f"\n📊 替换总结:")
    print(f"- 总替换数: {total_replaced}")
    print(f"- 颜色变体模板: {result['template_replaced']} ({round(result['template_replaced']/total_replaced*100 if total_replaced else 0, 1)}%)")
    print(f"- 常规类映射: {result['class_replaced']} ({round(result['class_replaced']/total_replaced*100 if total_replaced else 0, 1)}%)")

apply_button.on_click(on_apply_clicked)

In [None]:
# 生成建议名称
def suggest_name(class_combo, context, existing_names=None):
    """为类组合生成语义化名称建议"""
    if existing_names is None:
        existing_names = set()

    # 分析类组合
    classes = class_combo.split()

    # 从上下文提取信息
    section_match = re.search(r'#([a-zA-Z0-9_-]+)', context)
    section = section_match.group(1) if section_match else ""

    tag_match = re.search(r'<([a-zA-Z0-9_-]+)>', context)
    tag = tag_match.group(1) if tag_match else ""

    # 确定元素类型
    element_type = ""
    if tag in ['a', 'button']:
        element_type = 'btn'
    elif tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
        element_type = 'heading'
    elif tag == 'p':
        element_type = 'text'
    elif tag == 'div':
        if any('flex' in cls for cls in classes):
            element_type = 'flex-container'
        elif any('grid' in cls for cls in classes):
            element_type = 'grid'
        else:
            element_type = 'container'
    elif tag == 'span':
        element_type = 'label'
    elif tag == 'img':
        element_type = 'image'

    # 识别样式特征
    style_features = []
    if any('text-' in cls for cls in classes):
        style_features.append('text')
    if any('bg-' in cls for cls in classes):
        style_features.append('bg')
    if any('dark:' in cls for cls in classes):
        style_features.append('themed')
    if any('hover:' in cls for cls in classes):
        style_features.append('interactive')
    if any('rounded' in cls for cls in classes):
        style_features.append('rounded')
    if any('shadow' in cls for cls in classes):
        style_features.append('shadow')
    if any('border' in cls for cls in classes):
        style_features.append('bordered')

    # 组合名称部分
    name_parts = []

    # 添加区域/位置
    if section in ['header', 'footer', 'gallery', 'about', 'banner', 'main']:
        name_parts.append(section)

    # 添加元素类型
    if element_type:
        name_parts.append(element_type)

    # 添加样式特征
    if style_features and (not element_type or element_type in ['container', 'text']):
        for feature in ['interactive', 'themed', 'shadow', 'rounded', 'bordered', 'bg']:
            if feature in style_features:
                name_parts.append(feature)
                break

    # 如果没有足够信息，使用默认名称
    if not name_parts:
        base_name = 'ui-component'
    else:
        base_name = '-'.join(name_parts)

    # 确保名称唯一
    final_name = base_name
    i = 1
    while final_name in existing_names:
        final_name = f"{base_name}-{i}"
        i += 1

    return final_name

def load_js_mappings(js_file):
    """加载现有JS映射"""
    class_mappings = {}  # 类 -> 名称
    template_mappings = {}  # 模板 -> 名称

    if os.path.exists(js_file):
        with open(js_file, 'r', encoding='utf-8') as f:
            content = f.read()

            # 提取类映射
            class_pattern = r'twClasses\s*=\s*{([^}]*)}'
            class_match = re.search(class_pattern, content, re.DOTALL)
            if class_match:
                for m in re.finditer(r"'([^']+)':\s*'([^']+)'", class_match.group(1)):
                    name, classes = m.groups()
                    class_mappings[classes] = name

            # 提取模板映射
            template_pattern = r'twTemplates\s*=\s*{([^}]*)}'
            template_match = re.search(template_pattern, content, re.DOTALL)
            if template_match:
                for m in re.finditer(r"'([^']+)':\s*'([^']+)'", template_match.group(1)):
                    name, template = m.groups()
                    template_mappings[template] = name

    return class_mappings, template_mappings

# 生成建议界面
js_file_input = widgets.Text(value='tailwind-classes.js', description='JS文件:')
suggest_button = widgets.Button(description="生成名称建议")
suggestions_output = widgets.Output()

display(HTML("<h3>步骤2: 生成名称建议</h3>"))
display(js_file_input)
display(suggest_button)
display(suggestions_output)

@suggestions_output.capture()
def on_suggest_clicked(b):
    # 检查是否已运行分析
    if 'global_df' not in globals():
        print("请先运行HTML分析")
        return

    js_file = js_file_input.value

    # 加载现有映射
    existing_class_mappings, existing_template_mappings = load_js_mappings(js_file)
    print(f"已加载 {len(existing_class_mappings)} 个类映射和 {len(existing_template_mappings)} 个模板映射")

    # 准备生成建议
    df = global_df.copy()

    # 筛选条件: 出现次数 >= 2 或 长度 > 30
    filtered_df = df[(df['出现次数'] >= 2) | (df['长度'] > 30)]
    filtered_df = filtered_df.sort_values(by='出现次数', ascending=False)

    # 已使用的名称
    used_names = set(existing_class_mappings.values())
    used_names.update(existing_template_mappings.values())

    # 为每个类组合生成建议名称
    suggestions = {}

    for idx, row in filtered_df.iterrows():
        class_combo = row['类组合']
        if class_combo in existing_class_mappings:
            continue  # 跳过已映射的类

        name = suggest_name(class_combo, row['上下文'], used_names)
        suggestions[class_combo] = name
        used_names.add(name)  # 立即添加防止重复

    # 为模板生成建议
    template_suggestions = {}
    if 'global_color_templates' in globals():
        for template, variants in global_color_templates.items():
            if template in existing_template_mappings:
                continue  # 跳过已映射的模板

            # 使用第一个变体
            first_variant = variants[0]['original']
            variant_row = df[df['类组合'] == first_variant]

            if not variant_row.empty:
                context = variant_row.iloc[0]['上下文']
                name = suggest_name(first_variant, context, used_names)
                template_suggestions[template] = name
                used_names.add(name)

    # 存储建议供编辑使用
    global global_suggestions, global_template_suggestions
    global_suggestions = suggestions
    global_template_suggestions = template_suggestions

    # 显示常规类建议
    if suggestions:
        print(f"为 {len(suggestions)} 个类组合生成了名称建议:")

        suggestion_data = []
        for class_combo, name in suggestions.items():
            row = df[df['类组合'] == class_combo].iloc[0]
            suggestion_data.append({
                '类组合': class_combo,
                '出现次数': row['出现次数'],
                '长度': row['长度'],
                '建议名称': name,
                '上下文': row['上下文']
            })

        suggestion_df = pd.DataFrame(suggestion_data)
        suggestion_df = suggestion_df.sort_values(by='出现次数', ascending=False)

        display(HTML(suggestion_df.to_html(escape=False)))
    else:
        print("没有需要处理的类组合")

    # 显示模板建议
    if template_suggestions:
        print(f"\n为 {len(template_suggestions)} 个颜色模板生成了名称建议:")

        template_data = []
        for template, name in template_suggestions.items():
            variants = global_color_templates[template]
            colors = ", ".join(sorted(set(v['color'] for v in variants)))
            total_count = sum(v['count'] for v in variants)

            template_data.append({
                '模板': template,
                '总出现次数': total_count,
                '建议名称': name,
                '颜色变体': colors
            })

        template_df = pd.DataFrame(template_data)
        template_df = template_df.sort_values(by='总出现次数', ascending=False)

        display(HTML(template_df.to_html(escape=False)))
    else:
        print("\n没有需要处理的颜色模板")

suggest_button.on_click(on_suggest_clicked)

In [None]:
# 编辑映射名称
# 编辑映射名称
def create_editable_interface():
    """创建可编辑的映射界面"""
    if 'global_suggestions' not in globals() or not global_suggestions:
        print("请先生成名称建议")
        return

    # 创建常规类映射编辑界面
    print("编辑类映射名称:")

    # 创建表格显示原始数据
    suggestion_data = []
    for class_combo, name in global_suggestions.items():
        row = global_df[global_df['类组合'] == class_combo].iloc[0]

        # 检查是否属于已识别的颜色模板组
        is_color_variant = False
        in_template_group = False
        for template, variants in global_color_templates.items():
            for variant in variants:
                if variant['original'] == class_combo:
                    is_color_variant = True
                    in_template_group = True
                    break
            if in_template_group:
                break

        # 如果是颜色变体但已在全局模板中，跳过
        if is_color_variant and in_template_group:
            continue

        suggestion_data.append({
            '类组合': class_combo,
            '出现次数': row['出现次数'],
            '长度': len(class_combo),
            '建议名称': name,
            '名称长度': len(name),
            '长度比较': '⚠️ 名称更长' if (len(name)+3) > len(class_combo) else '✓ 名称更短'
        })

    suggestion_df = pd.DataFrame(suggestion_data)
    suggestion_df = suggestion_df.sort_values(by='出现次数', ascending=False)

    display(HTML(suggestion_df.to_html(escape=False)))

    # 创建编辑控件
    class_widgets = {}
    class_original_suggestions = {}  # 存储原始的建议名称

    for i, row in suggestion_df.iterrows():
        class_combo = row['类组合']
        suggested_name = row['建议名称']
        freq = row['出现次数']

        # 保存原始建议名称
        class_original_suggestions[class_combo] = suggested_name

        # 显示截断的类名
        display_class = (class_combo[:50] + "...") if len(class_combo) > 50 else class_combo

        # 长度比较警告
        length_warning = ""
        if (len(suggested_name)+3) > len(class_combo):
            length_warning = "<span style='color:red'>⚠️ 建议名称比原类组合更长!</span>"

        # 创建输入控件
        w = widgets.Text(
            value=suggested_name,
            description=f'#{i+1} [次数:{freq}]:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='60%')
        )
        class_widgets[class_combo] = w

        # 显示控件和上下文
        display(HTML(f"<b>类组合:</b> {display_class} <b>长度:</b> {len(class_combo)} {length_warning}"))
        display(w)
        display(HTML("<hr style='margin: 5px 0'>"))

    # 创建模板编辑界面
    template_widgets = {}
    if 'global_template_suggestions' in globals() and global_template_suggestions:
        print("\n编辑参数化模板名称:")

        for i, (template, suggested_name) in enumerate(global_template_suggestions.items()):
            # 获取变体信息
            variants = global_color_templates[template]
            colors = ", ".join(sorted(set(v['color'] for v in variants)))
            total_count = sum(v['count'] for v in variants)
            example = variants[0]['original']

            # 创建输入控件
            w = widgets.Text(
                value=suggested_name,
                description=f'模板 #{i+1} [次数:{total_count}]:',
                style={'description_width': 'initial'},
                layout=widgets.Layout(width='60%')
            )
            template_widgets[template] = w

            # 显示控件和信息
            display(HTML(f"<b>模板:</b> {template}<br><b>颜色变体:</b> {colors}<br><b>示例:</b> {example}"))
            display(w)
            display(HTML("<hr style='margin: 5px 0'>"))

    # 保存按钮
    save_button = widgets.Button(description="保存映射")
    save_output = widgets.Output()

    @save_output.capture()
    def on_save_clicked(b):
        # 收集编辑后的名称
        edited_class_names = {}
        edited_template_names = {}

        # 检查名称唯一性
        all_names = set()
        duplicates = []

        # 收集类名称
        for class_combo, widget in class_widgets.items():
            name = widget.value.strip()
            original_suggestion = class_original_suggestions[class_combo]

            # 如果名称比原类组合长 且 没有被修改，则跳过
            if (len(name)+3) > len(class_combo) and name == original_suggestion:
                print(f"跳过名称更长的映射: '{name}' -> '{class_combo}'")
                continue

            if name:
                if name in all_names:
                    duplicates.append(name)
                else:
                    all_names.add(name)
                    edited_class_names[class_combo] = name

        # 收集模板名称
        for template, widget in template_widgets.items():
            name = widget.value.strip()
            if name:
                if name in all_names:
                    duplicates.append(name)
                else:
                    all_names.add(name)
                    edited_template_names[template] = name

        # 检查冲突
        if duplicates:
            print("错误: 发现名称冲突")
            for dup in duplicates:
                print(f"冲突名称: {dup}")

                # 显示使用此名称的所有类/模板
                for cls, name in edited_class_names.items():
                    if name == dup:
                        print(f" - 类: {cls}")

                for tmpl, name in edited_template_names.items():
                    if name == dup:
                        print(f" - 模板: {tmpl}")

            return

        # 保存到JS文件
        js_file = js_file_input.value

        # 加载现有映射
        existing_class_mappings, existing_template_mappings = load_js_mappings(js_file)

        # 合并映射
        all_class_mappings = {**existing_class_mappings, **edited_class_names}
        all_template_mappings = {**existing_template_mappings, **edited_template_names}

        # 更新JS文件
        save_mappings_to_js(js_file, all_class_mappings, all_template_mappings)

        print(f"已成功保存映射到 {js_file}")
        print(f"- 新增 {len(edited_class_names)} 个类映射 (总计: {len(all_class_mappings)})")
        print(f"- 新增 {len(edited_template_names)} 个模板映射 (总计: {len(all_template_mappings)})")

    save_button.on_click(on_save_clicked)
    display(save_button)
    display(save_output)

def save_mappings_to_js(js_file, class_mappings, template_mappings):
    """保存映射到JS文件"""
    # 准备JS内容
    if os.path.exists(js_file):
        with open(js_file, 'r', encoding='utf-8') as f:
            content = f.read()
    else:
        # 创建新文件
        content = """/**
 * Tailwind类简化映射
 *
 * 这个文件定义了常用Tailwind类组合的简化映射，目的是:
 * 1. 减少HTML文件中的重复代码
 * 2. 提高可读性和可维护性
 * 3. 减少LLM上下文窗口大小
 */

// 定义类映射
const twClasses = {
};

// 定义参数化模板
const twTemplates = {
};

// 页面加载时应用类名
document.addEventListener('DOMContentLoaded', function() {
  console.log('正在应用Tailwind类映射...');

  // 应用普通类映射
  const elements = document.querySelectorAll('[data-tw]');
  elements.forEach(el => {
    const classNames = el.getAttribute('data-tw').split(' ');
    classNames.forEach(name => {
      if (twClasses[name]) {
        twClasses[name].split(' ').forEach(cls => {
          el.classList.add(cls);
        });
      }
    });
  });

  // 应用参数化模板
  const paramElements = document.querySelectorAll('[data-tw-param]');
  paramElements.forEach(el => {
    const paramData = el.getAttribute('data-tw-param').split(':');
    if (paramData.length >= 2) {
      const templateName = paramData[0];
      const params = paramData[1].split(',');

      // 检查是否有匹配的模板
      if (twTemplates[templateName]) {
        let classString = twTemplates[templateName];

        // 替换颜色参数
        const colorPattern = /{color}/g;
        if (params[0]) {
          classString = classString.replace(colorPattern, params[0]);
        }

        // 添加处理后的类
        classString.split(' ').forEach(cls => {
          el.classList.add(cls);
        });
      }
    }
  });

  console.log('Tailwind类映射应用完成!');
});
"""

    # 格式化类映射
    class_content = ""
    for class_combo, name in sorted(class_mappings.items(), key=lambda x: x[1]):
        class_content += f"  '{name}': '{class_combo}',\n"

    # 格式化模板映射
    template_content = ""
    for template, name in sorted(template_mappings.items(), key=lambda x: x[1]):
        template_content += f"  '{name}': '{template}',\n"

    # 更新JS内容
    if 'twClasses = {' in content:
        content = re.sub(
            r'const\s+twClasses\s*=\s*{[^}]*}',
            f'const twClasses = {{\n{class_content}}}',
            content
        )

    if 'twTemplates = {' in content:
        content = re.sub(
            r'const\s+twTemplates\s*=\s*{[^}]*}',
            f'const twTemplates = {{\n{template_content}}}',
            content
        )

    # 保存文件
    with open(js_file, 'w', encoding='utf-8') as f:
        f.write(content)

# 编辑界面
edit_button = widgets.Button(description="编辑映射名称")
edit_output = widgets.Output()

display(HTML("<h3>步骤3: 编辑和保存映射</h3>"))
display(edit_button)
display(edit_output)

@edit_output.capture()
def on_edit_clicked(b):
    create_editable_interface()

edit_button.on_click(on_edit_clicked)

In [None]:
# 应用映射到HTML
def apply_mappings_to_html(html_file, js_file, output_file=None):
    """将映射应用到HTML文件（文本替换方式）- 先应用参数模板"""
    # 确定输出文件名
    if output_file is None:
        name, ext = os.path.splitext(html_file)
        output_file = f"{name}_mapped{ext}"

    # 加载映射
    class_mappings, template_mappings = load_js_mappings(js_file)

    # 读取HTML文件
    with open(html_file, 'r', encoding='utf-8') as f:
        html_content = f.read()

    # 1. 首先应用参数化模板 (关键修改)
    template_replaced_count = 0
    template_detail = []

    # 用于检测颜色的正则表达式
    color_regex = re.compile(r'(bg|text|border)-(red|blue|green|yellow|indigo|purple|pink|gray|orange|teal|cyan|amber|lime|emerald)-(\d+)')

    # 创建所有可能的颜色映射
    colors = ['red', 'blue', 'green', 'yellow', 'indigo', 'purple', 'pink',
             'gray', 'orange', 'teal', 'cyan', 'amber', 'lime', 'emerald']

    for template_str, name in template_mappings.items():
        if '{color}' in template_str:
            for color in colors:
                # 创建具体颜色变体
                concrete_class = template_str.replace('{color}', color)

                # 精确匹配整个class属性
                pattern = r'class=["\']' + re.escape(concrete_class) + r'["\']'
                replacement = f'data-tw-param="{name}:{color}"'

                # 计算当前替换数量
                matches_before = len(re.findall(pattern, html_content))
                html_content = re.sub(pattern, replacement, html_content)
                matches_after = len(re.findall(pattern, html_content))

                replaced_in_this_round = matches_before - matches_after

                if replaced_in_this_round > 0:
                    template_replaced_count += replaced_in_this_round
                    template_detail.append({
                        'template': name,
                        'color': color,
                        'original': concrete_class,
                        'count': replaced_in_this_round
                    })

    # 2. 然后应用常规类映射
    replaced_count = 0
    for class_combo, name in class_mappings.items():
        pattern = r'class=["\']' + re.escape(class_combo) + r'["\']'
        replacement = f'data-tw="{name}"'

        # 计算当前替换数量
        matches_before = len(re.findall(pattern, html_content))
        html_content = re.sub(pattern, replacement, html_content)
        matches_after = len(re.findall(pattern, html_content))

        replaced_in_this_round = matches_before - matches_after
        replaced_count += replaced_in_this_round

    # 保存更新后的HTML
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(html_content)

    # 检查剩余的长类
    soup = BeautifulSoup(html_content, 'lxml')
    remaining = []

    for element in soup.find_all(True):
        if 'class' in element.attrs and element['class']:
            class_str = ' '.join(element['class'])
            if len(class_str) > 30:  # 只关注长类
                remaining.append((class_str, len(class_str)))

    # 按长度排序
    remaining.sort(key=lambda x: x[1], reverse=True)

    return {
        'output_file': output_file,
        'regular_replaced': replaced_count,
        'template_replaced': template_replaced_count,
        'template_details': template_detail,
        'remaining': remaining[:10]  # 只显示前10个
    }

# 应用映射界面
html_input = widgets.Text(value='index.html', description='HTML文件:')
js_input = widgets.Text(value='tailwind-classes.js', description='JS文件:')
output_input = widgets.Text(value='', description='输出文件:', placeholder='留空使用默认名称')
apply_button = widgets.Button(description="应用映射")
apply_output = widgets.Output()

display(HTML("<h3>步骤4: 应用映射到HTML</h3>"))
display(widgets.VBox([html_input, js_input, output_input]))
display(apply_button)
display(apply_output)

@apply_output.capture()
def on_apply_clicked(b):
    html_file = html_input.value
    js_file = js_input.value
    output_file = output_input.value or None

    if not os.path.exists(html_file):
        print(f"错误: HTML文件 {html_file} 不存在")
        return

    if not os.path.exists(js_file):
        print(f"错误: JS文件 {js_file} 不存在")
        return

    print(f"正在应用映射 {js_file} 到 {html_file}...")
    result = apply_mappings_to_html(html_file, js_file, output_file)

    print(f"已生成更新后的HTML文件: {result['output_file']}")
    print(f"替换了 {result['regular_replaced']} 个常规class属性")

    if result['template_replaced'] > 0:
        print(f"替换了 {result['template_replaced']} 个模板class属性")
        print("\n模板替换详情:")
        for detail in result['template_details']:
            print(f"- 模板 '{detail['template']}' 颜色 '{detail['color']}': {detail['count']}次")
    else:
        print("没有替换任何模板")

    if result['remaining']:
        print("\n仍有以下长类组合未处理:")
        for cls, length in result['remaining']:
            print(f"- 长度{length}: {cls}")

apply_button.on_click(on_apply_clicked)

In [None]:
# 分析HTML文件中的类组合
def analyze_html(html_file):
    """分析HTML文件，识别常规类和参数化模板"""
    with open(html_file, 'r', encoding='utf-8') as file:
        content = file.read()
        soup = BeautifulSoup(content, 'lxml')

    # 收集所有类组合
    class_combinations = Counter()
    class_contexts = {}

    # 识别颜色模板候选
    color_variants = {}
    color_pattern = re.compile(r'bg-([a-z]+)-\d+')

    for element in soup.find_all(True):
        if 'class' in element.attrs and element['class']:
            class_combo = ' '.join(element['class'])
            if class_combo:
                class_combinations[class_combo] += 1

                # 收集上下文信息
                if class_combo not in class_contexts:
                    class_contexts[class_combo] = []

                # 添加元素信息
                context = {
                    'tag': element.name,
                    'content': element.get_text()[:30].strip() + '...' if len(element.get_text()) > 30 else element.get_text().strip(),
                    'section': element.find_parent(id=True).get('id', '') if element.find_parent(id=True) else ''
                }
                class_contexts[class_combo].append(context)

                # 检查颜色模式
                match = color_pattern.search(class_combo)
                if match:
                    color_name = match.group(1)
                    # 创建颜色无关的模板
                    template = re.sub(r'bg-[a-z]+-(\d+)', 'bg-{color}-\\1', class_combo)
                    template = re.sub(r'dark:bg-[a-z]+-(\d+)', 'dark:bg-{color}-\\1', template)

                    if template != class_combo:  # 确保有颜色替换
                        if template not in color_variants:
                            color_variants[template] = []

                        variant_info = {
                            'color': color_name,
                            'original': class_combo,
                            'count': class_combinations[class_combo]
                        }

                        # 避免重复添加相同颜色
                        if not any(v['color'] == color_name for v in color_variants[template]):
                            color_variants[template].append(variant_info)

    # 过滤出有多个颜色变体的模板
    valid_templates = {template: variants for template, variants in color_variants.items()
                      if len(variants) >= 2}

    # 为常规类准备DataFrame
    regular_data = []
    for combo, count in class_combinations.items():
        # 跳过已被识别为模板的类
        if any(combo in [v['original'] for v in variants] for variants in valid_templates.values()):
            continue

        # 创建上下文描述
        contexts = class_contexts.get(combo, [])
        context_str = []
        for ctx in contexts[:2]:
            section = f"#{ctx['section']}" if ctx['section'] else ""
            content = f'"{ctx["content"]}"' if ctx['content'] else ""
            context_str.append(f"<{ctx['tag']}> {section} {content}")

        regular_data.append({
            '类组合': combo,
            '出现次数': count,
            '长度': len(combo),
            '上下文': ' | '.join(context_str)
        })

    # 为模板准备DataFrame
    template_data = []
    for template, variants in valid_templates.items():
        # 获取所有变体的颜色
        colors = [v['color'] for v in variants]
        total_count = sum(v['count'] for v in variants)

        template_data.append({
            '模板': template,
            '颜色变体': ', '.join(colors),
            '总次数': total_count,
            '长度': len(template)
        })

    # 创建和返回DataFrame
    regular_df = pd.DataFrame(regular_data)
    template_df = pd.DataFrame(template_data)

    # 按频率和长度排序
    if not regular_df.empty:
        regular_df = regular_df.sort_values(['出现次数', '长度'], ascending=[False, False])

    if not template_df.empty:
        template_df = template_df.sort_values(['总次数', '长度'], ascending=[False, False])

    return regular_df, template_df, valid_templates

# 运行分析
html_file_input = widgets.Text(value='index.html', description='HTML文件:')
analyze_button = widgets.Button(description="分析HTML")
analysis_output = widgets.Output()

display(html_file_input)
display(analyze_button)
display(analysis_output)

@analysis_output.capture()
def on_analyze_clicked(b):
    html_file = html_file_input.value
    if not os.path.exists(html_file):
        print(f"错误: 文件 {html_file} 不存在")
        return

    print(f"正在分析 {html_file}...")
    regular_df, template_df, templates = analyze_html(html_file)

    # 存储结果供后续单元格使用
    global global_regular_df, global_template_df, global_templates
    global_regular_df = regular_df
    global_template_df = template_df
    global_templates = templates

    # 显示常规类结果
    if not regular_df.empty:
        print(f"\n找到 {len(regular_df)} 个常规类组合")
        # 只显示高频长类
        filtered_regular = regular_df[(regular_df['出现次数'] >= 2) | (regular_df['长度'] >= 30)].head(15)
        if not filtered_regular.empty:
            print("\n高频长类组合:")
            display(HTML(filtered_regular.to_html(escape=False)))
        else:
            print("没有找到高频长类组合")

    # 显示模板结果
    if not template_df.empty:
        print(f"\n找到 {len(template_df)} 个参数化模板候选")
        display(HTML(template_df.to_html(escape=False)))

        # 详细显示前3个模板
        print("\n详细模板信息 (前3个):")
        for i, (template, variants) in enumerate(list(templates.items())[:3]):
            print(f"\n模板 {i+1}: {template}")
            for v in variants:
                print(f"  - 颜色: {v['color']}, 原始类: {v['original']}, 出现次数: {v['count']}")
    else:
        print("没有找到参数化模板候选")

analyze_button.on_click(on_analyze_clicked)

In [None]:
# 创建和管理映射（修复版）
def suggest_name(combo, contexts, existing_names=None):
    """智能推荐语义化类名，确保名称唯一性"""
    if existing_names is None:
        existing_names = set()

    # 从上下文提取线索
    section_ids = [ctx['section'] for ctx in contexts if ctx.get('section')]
    tags = [ctx['tag'] for ctx in contexts if ctx.get('tag')]
    common_tag = max(set(tags), key=tags.count) if tags else ""

    # 从类名提取关键特征
    classes = combo.split()

    # 识别区域
    area = None
    for section in section_ids:
        if section in ['header', 'footer', 'navbar', 'main', 'gallery', 'about', 'banner']:
            area = section
            break

    # 识别元素类型
    element_type = None
    if common_tag in ['a', 'button']:
        element_type = 'btn'
    elif common_tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
        element_type = 'heading'
    elif common_tag == 'p':
        element_type = 'text'
    elif common_tag == 'div':
        if any('flex' in cls for cls in classes):
            element_type = 'flex-container'
        elif any('grid' in cls for cls in classes):
            element_type = 'grid'
        else:
            element_type = 'container'
    elif common_tag == 'span':
        element_type = 'label'
    elif common_tag == 'img':
        element_type = 'image'

    # 识别特殊样式特征
    style_features = []
    if any('text-' in cls for cls in classes):
        style_features.append('text')
    if any('bg-' in cls for cls in classes):
        style_features.append('bg')
    if any('dark:' in cls for cls in classes):
        style_features.append('themed')
    if any('hover:' in cls for cls in classes):
        style_features.append('interactive')
    if any('rounded' in cls for cls in classes):
        style_features.append('rounded')
    if any('shadow' in cls for cls in classes):
        style_features.append('shadow')
    if any('border' in cls for cls in classes):
        style_features.append('bordered')
    if any('p-' in cls for cls in classes) or any('px-' in cls for cls in classes) or any('py-' in cls for cls in classes):
        style_features.append('padded')
    if any('m-' in cls for cls in classes) or any('mx-' in cls for cls in classes) or any('my-' in cls for cls in classes):
        style_features.append('margin')

    # 组合生成名称
    name_parts = []
    if area:
        name_parts.append(area)
    if element_type:
        name_parts.append(element_type)

    # 添加一个样式特征以增加区分度
    if style_features and (not element_type or element_type in ['container', 'text']):
        # 选择最具区分性的特征
        priority_features = ['interactive', 'themed', 'shadow', 'rounded', 'bordered', 'bg', 'padded', 'margin', 'text']
        for feature in priority_features:
            if feature in style_features:
                name_parts.append(feature)
                break

    # 如果没有足够信息，使用通用名称
    if not name_parts:
        base_name = "ui-component"
    else:
        base_name = "-".join(name_parts)

    # 确保名称唯一
    final_name = base_name
    i = 1
    while final_name in existing_names:
        # 如果已存在，尝试添加更多样式特征以区分
        if i == 1 and style_features and len(style_features) > 1:
            # 找到一个尚未使用的特征
            unused_features = [f for f in style_features if f not in name_parts]
            if unused_features:
                name_parts.append(unused_features[0])
                final_name = "-".join(name_parts)
                i += 1
                continue

        final_name = f"{base_name}-{i}"
        i += 1

    return final_name

def load_existing_mappings(js_file):
    """从JS文件加载现有映射"""
    regular_mappings = {}
    template_mappings = {}

    if os.path.exists(js_file):
        with open(js_file, 'r', encoding='utf-8') as f:
            js_content = f.read()

            # 提取常规映射（从名称到类）
            regular_pattern = r"twClasses\s*=\s*{([^}]*)}"
            match = re.search(regular_pattern, js_content, re.DOTALL)
            if match:
                for m in re.finditer(r"'([^']+)':\s*'([^']+)'", match.group(1)):
                    name, classes = m.groups()
                    # 存储为 类 -> 名称 的映射
                    regular_mappings[classes] = name

            # 提取模板
            template_pattern = r"twTemplates\s*=\s*{([^}]*)}"
            match = re.search(template_pattern, js_content, re.DOTALL)
            if match:
                for m in re.finditer(r"'([^']+)':\s*'([^']+)'", match.group(1)):
                    name, template = m.groups()
                    template_mappings[template] = name

    return regular_mappings, template_mappings

# 创建映射管理界面
js_file_input = widgets.Text(value='tailwind-classes.js', description='JS文件:')
load_button = widgets.Button(description="加载映射并生成建议")
mapping_output = widgets.Output()

display(HTML("<h3>映射管理</h3>"))
display(js_file_input)
display(load_button)
display(mapping_output)

@mapping_output.capture()
def on_load_clicked(b):
    js_file = js_file_input.value
    regular_mappings, template_mappings = load_existing_mappings(js_file)

    # 存储结果供后续使用
    global global_regular_mappings, global_template_mappings
    global_regular_mappings = regular_mappings
    global_template_mappings = template_mappings

    print(f"已加载 {len(regular_mappings)} 个常规映射和 {len(template_mappings)} 个模板")

    # 检查是否已运行分析
    if 'global_regular_df' not in globals() or 'global_template_df' not in globals():
        print("请先运行HTML分析")
        return

    # 为高频类生成建议名称
    filtered_regular = global_regular_df[(global_regular_df['出现次数'] >= 2) | (global_regular_df['长度'] >= 30)]

    if filtered_regular.empty:
        print("没有找到需要处理的高频长类组合")
        return

    # 跟踪已使用的名称
    used_names = set(regular_mappings.values())

    # 创建映射建议
    suggestions = {}
    class_contexts = {}  # 存储上下文供编辑界面使用

    # 按频率排序，优先处理高频项
    sorted_regular = filtered_regular.sort_values(by=['出现次数', '长度'], ascending=[False, False])

    for idx, row in sorted_regular.iterrows():
        class_combo = row['类组合']
        if class_combo in regular_mappings:
            continue  # 跳过已映射的类

        # 提取上下文
        contexts = []
        for ctx_str in row['上下文'].split(' | '):
            # 简单解析上下文字符串
            tag_match = re.search(r'<([^>]+)>', ctx_str)
            section_match = re.search(r'#([^\s"]+)', ctx_str)
            content_match = re.search(r'"([^"]+)"', ctx_str)

            ctx = {}
            if tag_match:
                ctx['tag'] = tag_match.group(1)
            if section_match:
                ctx['section'] = section_match.group(1)
            if content_match:
                ctx['content'] = content_match.group(1)

            contexts.append(ctx)

        # 存储上下文供后续使用
        class_contexts[class_combo] = contexts

        # 生成建议名称（确保唯一）
        name = suggest_name(class_combo, contexts, used_names)
        suggestions[class_combo] = name
        used_names.add(name)  # 立即添加到已使用集合

    # 为模板生成建议名称
    template_suggestions = {}

    for template, variants in global_templates.items():
        if template in template_mappings:
            continue  # 跳过已映射的模板

        # 基于第一个变体生成名称
        if variants:
            first_variant = variants[0]['original']
            contexts = class_contexts.get(first_variant, [])
            base_name = suggest_name(first_variant, contexts, used_names)

            # 注意：不再在名称中包含颜色
            template_suggestions[template] = base_name
            used_names.add(base_name)  # 防止常规映射和模板名称冲突

    # 创建可编辑的映射表格
    if suggestions:
        print(f"\n为 {len(suggestions)} 个高频类生成了建议名称:")

        # 创建DataFrame用于显示
        global global_suggestions, global_class_contexts
        global_suggestions = suggestions
        global_class_contexts = class_contexts

        create_editable_mapping_table(suggestions, class_contexts)
    else:
        print("没有需要添加的常规映射")

    # 显示模板建议
    if template_suggestions:
        print(f"\n为 {len(template_suggestions)} 个模板生成了建议名称:")

        # 创建DataFrame用于显示
        global global_template_suggestions
        global_template_suggestions = template_suggestions

        create_editable_template_table(template_suggestions, global_templates)
    else:
        print("没有需要添加的模板映射")

# 创建交互式编辑表格
def create_editable_mapping_table(suggestions, class_contexts):
    """创建可编辑的映射表格"""
    # 创建一个表格视图
    entries = []
    for class_combo, name in suggestions.items():
        context_summary = ""
        for ctx in class_contexts.get(class_combo, [])[:1]:  # 只使用第一个上下文
            tag = ctx.get('tag', '')
            section = ctx.get('section', '')
            content = ctx.get('content', '')
            if tag:
                context_summary += f"<{tag}>"
            if section:
                context_summary += f" #{section}"
            if content:
                context_summary += f" \"{content}\""

        entries.append({
            '类组合': class_combo,
            '建议名称': name,
            '上下文': context_summary,
            '字符长度': len(class_combo)
        })

    # 创建DataFrame并显示
    df = pd.DataFrame(entries)
    display(HTML(df.to_html(escape=False)))

    # 创建交互式编辑界面
    print("\n编辑类名称:")
    name_widgets = {}

    for i, (class_combo, suggested_name) in enumerate(suggestions.items()):
        # 截断长class以便显示
        display_class = class_combo[:50] + "..." if len(class_combo) > 50 else class_combo
        label = f"{i+1}. {display_class}"

        # 创建文本输入控件
        w = widgets.Text(
            value=suggested_name,
            description=f'#{i+1}:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='50%')
        )
        name_widgets[class_combo] = w

        # 显示控件和类名
        display(HTML(f"<b>类组合:</b> {display_class}"))
        display(w)
        display(HTML("<hr style='margin: 5px 0;'>"))

    # 添加更新按钮
    update_button = widgets.Button(description="保存名称映射")
    update_output = widgets.Output()

    # 全局存储更新的名称
    global edited_names
    edited_names = {}

    @update_output.capture()
    def on_update_names(b):
        # 收集编辑后的名称
        edited_names.clear()
        used_names = set()
        conflicts = []

        for class_combo, widget in name_widgets.items():
            name = widget.value.strip()
            if name:
                if name in used_names:
                    conflicts.append(name)
                edited_names[class_combo] = name
                used_names.add(name)

        if conflicts:
            print(f"警告: 发现 {len(conflicts)} 个名称冲突!")
            for conflict in conflicts:
                print(f"冲突名称: {conflict}")
                for cls, name in edited_names.items():
                    if name == conflict:
                        print(f" - {cls}")
            return

        print(f"已更新 {len(edited_names)} 个名称映射")
        update_js_file(js_file_input.value)

    update_button.on_click(on_update_names)
    display(update_button)
    display(update_output)

# 创建模板编辑表格
def create_editable_template_table(template_suggestions, templates):
    """创建可编辑的模板表格"""
    # 创建一个表格视图
    entries = []
    for template, name in template_suggestions.items():
        # 找出所有颜色变体
        variants = []
        for t in templates:
            if 'variants' in templates[t]:
                variants = [v['color'] for v in templates[t]['variants']]
                break

        variants_str = ", ".join(variants) if variants else "未知颜色"

        entries.append({
            '模板': template[:50] + "..." if len(template) > 50 else template,
            '建议名称': name,
            '颜色变体': variants_str
        })

    # 创建DataFrame并显示
    df = pd.DataFrame(entries)
    display(HTML(df.to_html(escape=False)))

    # 创建交互式编辑界面
    print("\n编辑模板名称:")
    template_widgets = {}

    for i, (template, suggested_name) in enumerate(template_suggestions.items()):
        # 创建文本输入控件
        w = widgets.Text(
            value=suggested_name,
            description=f'模板 #{i+1}:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='50%')
        )
        template_widgets[template] = w

        # 显示控件
        display(w)

    # 添加更新按钮
    template_update_button = widgets.Button(description="保存模板名称")
    template_update_output = widgets.Output()

    # 全局存储更新的名称
    global edited_template_names
    edited_template_names = {}

    @template_update_output.capture()
    def on_update_template_names(b):
        # 收集编辑后的名称
        edited_template_names.clear()
        used_names = set()
        conflicts = []

        for template, widget in template_widgets.items():
            name = widget.value.strip()
            if name:
                if name in used_names:
                    conflicts.append(name)
                edited_template_names[template] = name
                used_names.add(name)

        if conflicts:
            print(f"警告: 发现 {len(conflicts)} 个名称冲突!")
            for conflict in conflicts:
                print(f"冲突名称: {conflict}")
                for templ, name in edited_template_names.items():
                    if name == conflict:
                        print(f" - {templ}")
            return

        print(f"已更新 {len(edited_template_names)} 个模板名称")
        update_js_file(js_file_input.value)

    template_update_button.on_click(on_update_template_names)
    display(template_update_button)
    display(template_update_output)

def update_js_file(js_file):
    """更新JS文件中的映射"""
    # 获取现有映射
    existing_mappings, existing_templates = load_existing_mappings(js_file)

    # 合并映射（编辑的优先）
    all_mappings = {**existing_mappings}
    for class_combo, name in getattr(globals(), 'edited_names', {}).items():
        all_mappings[class_combo] = name

    all_templates = {**existing_templates}
    for template, name in getattr(globals(), 'edited_template_names', {}).items():
        all_templates[template] = name

    # 准备JS内容
    if os.path.exists(js_file):
        with open(js_file, 'r', encoding='utf-8') as f:
            content = f.read()
    else:
        # 创建新文件
        content = """/**
 * Tailwind类简化映射
 *
 * 这个文件定义了常用Tailwind类组合的简化映射，目的是:
 * 1. 减少HTML文件中的重复代码
 * 2. 提高可读性和可维护性
 * 3. 减少LLM上下文窗口大小
 */

// 定义常规类映射
const twClasses = {
};

// 定义参数化模板（支持颜色等变体）
const twTemplates = {
};

// 页面加载时应用类名
document.addEventListener('DOMContentLoaded', function() {
  console.log('正在应用Tailwind类映射...');

  // 应用普通类映射
  const elements = document.querySelectorAll('[data-tw]');
  elements.forEach(el => {
    const classNames = el.getAttribute('data-tw').split(' ');
    classNames.forEach(name => {
      if (twClasses[name]) {
        twClasses[name].split(' ').forEach(cls => {
          el.classList.add(cls);
        });
      }
    });
  });

  // 应用参数化模板
  const paramElements = document.querySelectorAll('[data-tw-param]');
  paramElements.forEach(el => {
    const paramData = el.getAttribute('data-tw-param').split(':');
    if (paramData.length >= 2) {
      const templateName = paramData[0];
      const params = paramData[1].split(',');

      // 检查是否有匹配的模板
      if (twTemplates[templateName]) {
        let classString = twTemplates[templateName];

        // 替换颜色参数
        const colorPattern = /{color}/g;
        if (params[0]) {
          classString = classString.replace(colorPattern, params[0]);
        }

        // 添加处理后的类
        classString.split(' ').forEach(cls => {
          el.classList.add(cls);
        });
      }
    }
  });

  console.log('Tailwind类映射应用完成!');
});
"""

    # 转换映射为JS对象格式
    regular_content = ""
    for class_combo, name in sorted(all_mappings.items(), key=lambda x: x[1]):
        regular_content += f"  '{name}': '{class_combo}',\n"

    template_content = ""
    for template, name in sorted(all_templates.items(), key=lambda x: x[1]):
        template_content += f"  '{name}': '{template}',\n"

    # 更新内容
    if 'twClasses = {' in content:
        content = re.sub(
            r'const\s+twClasses\s*=\s*{[^}]*}',
            f'const twClasses = {{\n{regular_content}}}',
            content
        )

    if 'twTemplates = {' in content:
        content = re.sub(
            r'const\s+twTemplates\s*=\s*{[^}]*}',
            f'const twTemplates = {{\n{template_content}}}',
            content
        )

    # 保存文件
    with open(js_file, 'w', encoding='utf-8') as f:
        f.write(content)

    print(f"已更新JS文件: {js_file}")
    print(f"- 常规映射: {len(all_mappings)} 个")
    print(f"- 模板映射: {len(all_templates)} 个")

load_button.on_click(on_load_clicked)

In [None]:
# 创建和管理映射
def suggest_name(class_combo, contexts=None, existing_names=None):
    """根据类和上下文生成语义化名称"""
    if existing_names is None:
        existing_names = set()

    if not class_combo:
        return ""

    # 从上下文提取线索
    section_ids = []
    tags = []
    if contexts:
        section_ids = [ctx.get('section', '') for ctx in contexts if ctx.get('section', '')]
        tags = [ctx.get('tag', '') for ctx in contexts if ctx.get('tag', '')]

    common_tag = max(set(tags), key=tags.count) if tags else ""

    # 从类名提取关键特征
    classes = class_combo.split()

    # 识别区域
    area = None
    for section in section_ids:
        if section in ['header', 'footer', 'navbar', 'main', 'gallery', 'about', 'banner']:
            area = section
            break

    # 识别元素类型
    element_type = None
    if common_tag in ['a', 'button']:
        element_type = 'btn'
    elif common_tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
        element_type = 'heading'
    elif common_tag == 'p':
        element_type = 'text'
    elif common_tag == 'div':
        if any('flex' in cls for cls in classes):
            element_type = 'flex-container'
        elif any('grid' in cls for cls in classes):
            element_type = 'grid'
        else:
            element_type = 'container'
    elif common_tag == 'span':
        element_type = 'label'
    elif common_tag == 'img':
        element_type = 'image'

    # 如果没有从标签识别出元素类型，尝试从类名识别
    if not element_type:
        if "rounded-full" in class_combo and "flex" in class_combo:
            element_type = "avatar"
        elif "card" in class_combo or "shadow" in class_combo:
            element_type = "card"
        elif "container" in class_combo:
            element_type = "container"
        elif "text-" in class_combo and "font-" in class_combo:
            element_type = "text"
        elif "btn" in class_combo or "button" in class_combo:
            element_type = "button"
        elif "nav" in class_combo:
            element_type = "nav"
        elif "flex" in class_combo:
            element_type = "flex-box"
        elif "grid" in class_combo:
            element_type = "grid"

    # 识别特殊样式特征
    style_features = []
    if any('text-' in cls for cls in classes):
        style_features.append('text')
    if any('bg-' in cls for cls in classes):
        style_features.append('bg')
    if any('dark:' in cls for cls in classes):
        style_features.append('themed')
    if any('hover:' in cls for cls in classes):
        style_features.append('interactive')
    if any('rounded' in cls for cls in classes):
        style_features.append('rounded')
    if any('shadow' in cls for cls in classes):
        style_features.append('shadow')
    if any('border' in cls for cls in classes):
        style_features.append('bordered')
    if any('p-' in cls for cls in classes) or any('px-' in cls for cls in classes) or any('py-' in cls for cls in classes):
        style_features.append('padded')
    if any('m-' in cls for cls in classes) or any('mx-' in cls for cls in classes) or any('my-' in cls for cls in classes):
        style_features.append('margin')

    # 尺寸特性
    if "w-full" in class_combo:
        style_features.append("full-width")
    elif "h-screen" in class_combo:
        style_features.append("full-height")

    # 颜色特性
    color_feature = None
    for color in ["red", "blue", "green", "yellow", "indigo", "purple", "pink", "gray"]:
        if f"bg-{color}" in class_combo:
            color_feature = color
            break

    # 组合生成名称
    name_parts = []
    if area:
        name_parts.append(area)
    if element_type:
        name_parts.append(element_type)

    # 添加一个样式特征以增加区分度
    if style_features and (not element_type or element_type in ['container', 'text']):
        # 选择最具区分性的特征
        priority_features = ['interactive', 'themed', 'shadow', 'rounded', 'bordered', 'bg', 'padded', 'margin', 'text']
        for feature in priority_features:
            if feature in style_features:
                name_parts.append(feature)
                break

    # 添加颜色信息
    if color_feature:
        name_parts.append(color_feature)

    # 如果没有足够信息，使用通用名称
    if not name_parts:
        base_name = "ui-component"
    else:
        base_name = "-".join(name_parts)

    # 确保名称唯一
    final_name = base_name
    i = 1
    while final_name in existing_names:
        # 如果已存在，尝试添加更多样式特征以区分
        if i == 1 and style_features and len(style_features) > 1:
            # 找到一个尚未使用的特征
            unused_features = [f for f in style_features if f not in name_parts]
            if unused_features:
                name_parts.append(unused_features[0])
                final_name = "-".join(name_parts)
                i += 1
                continue

        final_name = f"{base_name}-{i}"
        i += 1

    return final_name

def load_existing_mappings(js_file):
    """从JS文件加载现有映射"""
    regular_mappings = {}
    templates = {}

    if os.path.exists(js_file):
        with open(js_file, 'r', encoding='utf-8') as f:
            js_content = f.read()

            # 提取常规映射
            regular_pattern = r"twClasses\s*=\s*{([^}]*)}"
            match = re.search(regular_pattern, js_content, re.DOTALL)
            if match:
                for m in re.finditer(r"'([^']+)':\s*'([^']+)'", match.group(1)):
                    name, classes = m.groups()
                    regular_mappings[classes] = name

            # 提取模板
            template_pattern = r"twTemplates\s*=\s*{([^}]*)}"
            match = re.search(template_pattern, js_content, re.DOTALL)
            if match:
                for m in re.finditer(r"'([^']+)':\s*'([^']+)'", match.group(1)):
                    name, template = m.groups()
                    templates[template] = name

    return regular_mappings, templates

# 创建映射管理界面
js_file_input = widgets.Text(value='tailwind-classes.js', description='JS文件:')
load_button = widgets.Button(description="加载现有映射")
mapping_output = widgets.Output()

display(HTML("<h3>映射管理</h3>"))
display(js_file_input)
display(load_button)
display(mapping_output)

@mapping_output.capture()
def on_load_clicked(b):
    js_file = js_file_input.value
    regular_mappings, template_mappings = load_existing_mappings(js_file)

    # 存储结果供后续使用
    global global_regular_mappings, global_template_mappings
    global_regular_mappings = regular_mappings
    global_template_mappings = template_mappings

    print(f"已加载 {len(regular_mappings)} 个常规映射和 {len(template_mappings)} 个模板")

    # 检查是否已运行分析
    if 'global_regular_df' not in globals() or 'global_template_df' not in globals():
        print("请先运行HTML分析")
        return

    # 为高频类生成建议名称
    filtered_regular = global_regular_df[(global_regular_df['出现次数'] >= 2) & (global_regular_df['长度'] >= 30)]
    suggestions = {}
    names_used = set(regular_mappings.values())  # 跟踪已使用的名称

    for _, row in filtered_regular.iterrows():
        class_combo = row['类组合']
        if class_combo in regular_mappings:
            continue  # 跳过已映射的类

        # 提取上下文
        contexts = []
        for ctx_str in row['上下文'].split(' | '):
            # 简单解析上下文字符串
            tag_match = re.search(r'<([^>]+)>', ctx_str)
            section_match = re.search(r'#([^\s"]+)', ctx_str)

            ctx = {}
            if tag_match:
                ctx['tag'] = tag_match.group(1)
            if section_match:
                ctx['section'] = section_match.group(1)

            contexts.append(ctx)

        # 生成建议名称
        name = suggest_name(class_combo, contexts, names_used)
        suggestions[class_combo] = name
        names_used.add(name)  # 立即添加到已使用集合中

    # 为模板生成建议名称
    template_suggestions = {}

    for template, variants in global_templates.items():
        if template in template_mappings:
            continue  # 跳过已映射的模板

        # 基于第一个变体生成名称
        if variants:
            first_variant = variants[0]
            base_name = suggest_name(first_variant['original'], [], names_used)
            template_name = base_name

            # 不再在名称中包含颜色，而是使用更通用的名称
            template_suggestions[template] = template_name
            names_used.add(template_name)  # 立即添加到已使用集合中

    # 创建映射编辑界面
    global_suggestions = suggestions
    global_template_suggestions = template_suggestions

    # 显示常规映射建议
    if suggestions:
        print(f"\n为 {len(suggestions)} 个高频类生成了建议名称:")

        suggestions_df = pd.DataFrame([
            {'类组合': cls, '建议名称': name}
            for cls, name in suggestions.items()
        ])

        display(HTML(suggestions_df.to_html(escape=False)))
    else:
        print("没有需要添加的常规映射")

    # 显示模板建议
    if template_suggestions:
        print(f"\n为 {len(template_suggestions)} 个模板生成了建议名称:")

        template_df = pd.DataFrame([
            {'模板': template, '建议名称': name}
            for template, name in template_suggestions.items()
        ])

        display(HTML(template_df.to_html(escape=False)))
    else:
        print("没有需要添加的模板映射")

    # 检查是否有名称冲突
    name_counts = Counter(list(suggestions.values()) + list(template_suggestions.values()))
    duplicates = [name for name, count in name_counts.items() if count > 1]

    if duplicates:
        print("\n警告：检测到名称冲突！")
        for dup in duplicates:
            print(f"\n冲突名称: {dup}")
            for cls, name in suggestions.items():
                if name == dup:
                    print(f"  - 类组合: {cls}")
            for template, name in template_suggestions.items():
                if name == dup:
                    print(f"  - 模板: {template}")

    # 创建更新按钮
    if suggestions or template_suggestions:
        update_button = widgets.Button(description="更新映射文件")

        @mapping_output.capture()
        def on_update_clicked(b):
            update_js_file(js_file, suggestions, template_suggestions)

        update_button.on_click(on_update_clicked)
        display(update_button)

def update_js_file(js_file, new_mappings=None, new_templates=None):
    """更新JS文件中的映射"""
    if new_mappings is None:
        new_mappings = {}
    if new_templates is None:
        new_templates = {}

    # 加载现有映射
    existing_mappings, existing_templates = load_existing_mappings(js_file)

    # 合并映射
    all_mappings = {**existing_mappings}
    for class_combo, name in new_mappings.items():
        if class_combo not in all_mappings:
            all_mappings[class_combo] = name

    all_templates = {**existing_templates}
    for template, name in new_templates.items():
        if template not in all_templates:
            all_templates[template] = name

    # 准备JS内容
    if os.path.exists(js_file):
        with open(js_file, 'r', encoding='utf-8') as f:
            content = f.read()
    else:
        # 创建新文件
        content = """/**
 * Tailwind类简化映射
 *
 * 这个文件定义了常用Tailwind类组合的简化映射，目的是:
 * 1. 减少HTML文件中的重复代码
 * 2. 提高可读性和可维护性
 * 3. 减少LLM上下文窗口大小
 */

// 定义常规类映射
const twClasses = {
};

// 定义参数化模板（支持颜色等变体）
const twTemplates = {
};

// 页面加载时应用类名
document.addEventListener('DOMContentLoaded', function() {
  console.log('正在应用Tailwind类映射...');

  // 应用普通类映射
  const elements = document.querySelectorAll('[data-tw]');
  elements.forEach(el => {
    const classNames = el.getAttribute('data-tw').split(' ');
    classNames.forEach(name => {
      if (twClasses[name]) {
        twClasses[name].split(' ').forEach(cls => {
          el.classList.add(cls);
        });
      }
    });
  });

  // 应用参数化模板
  const paramElements = document.querySelectorAll('[data-tw-param]');
  paramElements.forEach(el => {
    const paramData = el.getAttribute('data-tw-param').split(':');
    if (paramData.length >= 2) {
      const templateName = paramData[0];
      const params = paramData[1].split(',');

      // 检查是否有匹配的模板
      if (twTemplates[templateName]) {
        let classString = twTemplates[templateName];

        // 替换颜色参数
        const colorPattern = /{color}/g;
        if (params[0]) {
          classString = classString.replace(colorPattern, params[0]);
        }

        // 添加处理后的类
        classString.split(' ').forEach(cls => {
          el.classList.add(cls);
        });
      }
    }
  });

  console.log('Tailwind类映射应用完成!');
});
"""

    # 转换映射为JS对象格式
    regular_content = ""
    for class_combo, name in sorted(all_mappings.items(), key=lambda x: x[1]):
        regular_content += f"  '{name}': '{class_combo}',\n"

    template_content = ""
    for template, name in sorted(all_templates.items(), key=lambda x: x[1]):
        template_content += f"  '{name}': '{template}',\n"

    # 更新内容
    if 'twClasses = {' in content:
        content = re.sub(
            r'const\s+twClasses\s*=\s*{[^}]*}',
            f'const twClasses = {{\n{regular_content}}}',
            content
        )

    if 'twTemplates = {' in content:
        content = re.sub(
            r'const\s+twTemplates\s*=\s*{[^}]*}',
            f'const twTemplates = {{\n{template_content}}}',
            content
        )

    # 保存文件
    with open(js_file, 'w', encoding='utf-8') as f:
        f.write(content)

    print(f"已更新JS文件: {js_file}")
    print(f"- 添加了 {len(new_mappings)} 个新常规映射 (总计: {len(all_mappings)})")
    print(f"- 添加了 {len(new_templates)} 个新模板 (总计: {len(all_templates)})")

load_button.on_click(on_load_clicked)

In [None]:
# 应用映射到HTML文件
def apply_mappings(html_file, js_file, output_file=None):
    """使用文本替换方式将映射应用到HTML文件"""
    # 确定输出文件名
    if output_file is None:
        name, ext = os.path.splitext(html_file)
        output_file = f"{name}_mapped{ext}"

    # 加载映射
    regular_mappings, template_mappings = load_existing_mappings(js_file)

    # 读取HTML
    with open(html_file, 'r', encoding='utf-8') as f:
        html_content = f.read()

    # 应用常规映射（精确匹配）
    replaced_count = 0
    for class_str, name in regular_mappings.items():
        pattern = r'class=["\']' + re.escape(class_str) + r'["\']'
        replacement = f'data-tw="{name}"'

        # 计算匹配数量
        matches = len(re.findall(pattern, html_content))
        replaced_count += matches

        # 执行替换
        html_content = re.sub(pattern, replacement, html_content)

    # 为参数化模板创建具体变体映射
    color_names = ['red', 'blue', 'green', 'yellow', 'indigo', 'purple', 'pink',
                  'gray', 'orange', 'teal', 'cyan', 'amber', 'lime', 'emerald']

    template_replaced_count = 0
    template_replacements = []

    # 处理每个模板
    for template_str, template_name in template_mappings.items():
        # 对每个可能的颜色创建具体类字符串
        for color in color_names:
            # 替换模板中的颜色占位符
            concrete_class = template_str.replace('{color}', color)

            # 创建替换模式
            pattern = r'class=["\']' + re.escape(concrete_class) + r'["\']'
            replacement = f'data-tw-param="{template_name}:{color}"'

            # 计算匹配数量
            original_content = html_content
            html_content = re.sub(pattern, replacement, html_content)

            if html_content != original_content:
                count = original_content.count(f'class="{concrete_class}"') + original_content.count(f"class='{concrete_class}'")
                template_replaced_count += count
                template_replacements.append((concrete_class, template_name, color, count))

    # 保存更新后的HTML
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(html_content)

    # 统计未处理的类
    soup = BeautifulSoup(html_content, 'lxml')
    remaining_classes = Counter()

    for element in soup.find_all(True):
        if 'class' in element.attrs and element['class']:
            class_str = ' '.join(element['class'])
            if len(class_str) >= 30:  # 只记录长类
                remaining_classes[class_str] += 1

    # 按长度排序
    remaining_long = [(cls, count, len(cls)) for cls, count in remaining_classes.items()]
    remaining_long.sort(key=lambda x: x[2], reverse=True)

    return {
        'output_file': output_file,
        'replaced_count': replaced_count,
        'template_replaced_count': template_replaced_count,
        'template_replacements': template_replacements,
        'remaining_long': remaining_long[:10]  # 只返回前10个最长的
    }

# 创建应用映射的界面
html_input = widgets.Text(value='index.html', description='HTML文件:')
js_input = widgets.Text(value='tailwind-classes.js', description='JS文件:')
output_input = widgets.Text(value='', description='输出文件:', placeholder='留空使用默认名称')
apply_button = widgets.Button(description="应用映射")
apply_output = widgets.Output()

display(HTML("<h3>应用映射到HTML</h3>"))
display(html_input)
display(js_input)
display(output_input)
display(apply_button)
display(apply_output)

@apply_output.capture()
def on_apply_clicked(b):
    html_file = html_input.value
    js_file = js_input.value
    output_file = output_input.value or None

    if not os.path.exists(html_file):
        print(f"错误: HTML文件 {html_file} 不存在")
        return

    if not os.path.exists(js_file):
        print(f"错误: JS文件 {js_file} 不存在")
        return

    print(f"正在应用映射 {js_file} 到 {html_file}...")
    result = apply_mappings(html_file, js_file, output_file)

    print(f"已生成更新后的HTML文件: {result['output_file']}")
    print(f"替换了 {result['replaced_count']} 个常规class属性")

    if result['template_replaced_count'] > 0:
        print(f"替换了 {result['template_replaced_count']} 个参数化模板class属性")
        print("\n模板替换详情:")
        for cls, name, color, count in result['template_replacements']:
            if count > 0:
                print(f"- 将 '{cls}' 替换为 'data-tw-param=\"{name}:{color}\"', {count}次")
    else:
        print("未找到任何匹配的参数化模板")

    if result['remaining_long']:
        print("\n仍有以下长类组合未处理:")
        for cls, count, length in result['remaining_long']:
            print(f"- 【{count}次】长度{length}: {cls}")

apply_button.on_click(on_apply_clicked)

In [None]:
# 第二个单元格：分析HTML文件，提取详细上下文，并识别参数化模式
def analyze_html(html_file):
    """分析HTML文件，收集类组合及其详细上下文信息"""
    with open(html_file, 'r', encoding='utf-8') as file:
        soup = BeautifulSoup(file, 'lxml')

    # 统计所有class组合并记录详细位置
    class_combinations = Counter()
    class_contexts = {}

    # 用于识别潜在参数化模式的字典
    potential_templates = {}
    color_names = ['red', 'blue', 'green', 'yellow', 'indigo', 'purple', 'pink',
                  'gray', 'orange', 'teal', 'cyan', 'amber', 'lime', 'emerald']

    for element in soup.find_all(True):
        if 'class' in element.attrs and element['class']:
            class_combo = ' '.join(element['class'])  # 保持原始顺序
            if class_combo:
                class_combinations[class_combo] += 1

                # 收集更详细的上下文信息
                if class_combo not in class_contexts:
                    class_contexts[class_combo] = []

                # 获取元素ID和文本内容
                element_id = element.get('id', '')
                content = element.get_text()[:30].strip()
                if len(element.get_text()) > 30:
                    content += "..."

                # 获取最近的有ID的祖先
                ancestor = element.find_parent(id=True)
                section_id = ancestor.get('id', '') if ancestor else ''

                # 创建上下文信息对象
                context = {
                    'tag': element.name,
                    'id': element_id,
                    'section_id': section_id,
                    'content': content,
                    'html_snippet': str(element)[:100] + ('...' if len(str(element)) > 100 else '')
                }

                # 避免重复相似上下文
                if not any(c['id'] == context['id'] and
                          c['section_id'] == context['section_id'] and
                          c['content'] == context['content'] for c in class_contexts[class_combo]):
                    class_contexts[class_combo].append(context)

                # 识别潜在的参数化模式 (颜色变体)
                for color in color_names:
                    if f"bg-{color}-" in class_combo:
                        # 替换颜色为占位符并存储为潜在模板
                        template = re.sub(f"bg-{color}-([0-9]+)", "bg-{color}-\\1", class_combo)
                        template = re.sub(f"dark:bg-{color}-([0-9]+)", "dark:bg-{color}-\\1", template)

                        if template not in potential_templates:
                            potential_templates[template] = []

                        if class_combo not in potential_templates[template]:
                            potential_templates[template].append(class_combo)

    # 过滤出真正的参数化模板（至少有2个变体）
    valid_templates = {template: variants for template, variants in potential_templates.items()
                      if len(variants) >= 2}

    # 按出现频率排序
    sorted_combinations = sorted(
        [(combo, count, class_contexts[combo]) for combo, count in class_combinations.items()],
        key=lambda x: (x[1], len(x[0])),
        reverse=True
    )

    return sorted_combinations, valid_templates

# 运行分析
html_file = 'index.html'  # 可以修改为您的文件路径
combinations, templates = analyze_html(html_file)

# 转换为pandas DataFrame以便更好地可视化
data = []
for combo, count, contexts in combinations:
    # 提取最有意义的上下文
    context_str = []
    for ctx in contexts[:3]:  # 限制为前3个上下文
        section = f"#{ctx['section_id']}" if ctx['section_id'] else ""
        content = f'"{ctx["content"]}"' if ctx['content'] else ""
        context_str.append(f"<{ctx['tag']}> {section} {content}")

    data.append({
        '类组合': combo,
        '出现次数': count,
        '长度': len(combo),
        '上下文': ' | '.join(context_str),
        '详细上下文': contexts
    })

df = pd.DataFrame(data)

# 显示发现的参数化模板
if templates:
    print("发现潜在参数化模板:")
    for template, variants in templates.items():
        print(f"\n模板: {template}")
        print("变体:")
        for variant in variants:
            print(f"  - {variant}")

# 筛选高频/长组合
filtered_df = df[(df['出现次数'] >= 2) & (df['长度'] >= 30)].head(20)

# 展示结果表格
display(HTML(filtered_df[['类组合', '出现次数', '长度', '上下文']].to_html(escape=False)))

# 保存模板信息到全局变量
global_templates = templates

In [None]:
# 第二个单元格：分析HTML文件，提取详细上下文
def analyze_html(html_file):
    """分析HTML文件，收集类组合及其详细上下文信息"""
    with open(html_file, 'r', encoding='utf-8') as file:
        soup = BeautifulSoup(file, 'lxml')

    # 统计所有class组合并记录详细位置
    class_combinations = Counter()
    class_contexts = {}

    for element in soup.find_all(True):
        if 'class' in element.attrs and element['class']:
            class_combo = ' '.join(element['class'])  # 保持原始顺序
            if class_combo:
                class_combinations[class_combo] += 1

                # 收集更详细的上下文信息
                if class_combo not in class_contexts:
                    class_contexts[class_combo] = []

                # 获取元素ID和文本内容
                element_id = element.get('id', '')
                content = element.get_text()[:30].strip()
                if len(element.get_text()) > 30:
                    content += "..."

                # 获取最近的有ID的祖先
                ancestor = element.find_parent(id=True)
                section_id = ancestor.get('id', '') if ancestor else ''

                # 创建上下文信息对象
                context = {
                    'tag': element.name,
                    'id': element_id,
                    'section_id': section_id,
                    'content': content,
                    'html_snippet': str(element)[:100] + ('...' if len(str(element)) > 100 else '')
                }

                # 避免重复相似上下文
                if not any(c['id'] == context['id'] and
                           c['section_id'] == context['section_id'] and
                           c['content'] == context['content'] for c in class_contexts[class_combo]):
                    class_contexts[class_combo].append(context)

    # 按出现频率排序
    sorted_combinations = sorted(
        [(combo, count, class_contexts[combo]) for combo, count in class_combinations.items()],
        key=lambda x: (x[1], len(x[0])),
        reverse=True
    )

    return sorted_combinations

# 运行分析
html_file = 'index.html'  # 可以修改为您的文件路径
combinations = analyze_html(html_file)

# 转换为pandas DataFrame以便更好地可视化
data = []
for combo, count, contexts in combinations:
    # 提取最有意义的上下文
    context_str = []
    for ctx in contexts[:3]:  # 限制为前3个上下文
        section = f"#{ctx['section_id']}" if ctx['section_id'] else ""
        content = f'"{ctx["content"]}"' if ctx['content'] else ""
        context_str.append(f"<{ctx['tag']}> {section} {content}")

    data.append({
        '类组合': combo,
        '出现次数': count,
        '长度': len(combo),
        '上下文': ' | '.join(context_str),
        '详细上下文': contexts
    })

df = pd.DataFrame(data)

# 筛选高频/长组合
filtered_df = df[(df['出现次数'] >= 2) & (df['长度'] >= 30)].head(20)

# 展示结果表格
display(HTML(filtered_df[['类组合', '出现次数', '长度', '上下文']].to_html(escape=False)))

In [None]:
# 第三个单元格：加载现有JS映射文件（如果存在）
def load_js_mappings(js_file='tailwind-classes.js'):
    """加载现有的Tailwind类映射"""
    existing_mappings = {}

    if os.path.exists(js_file):
        with open(js_file, 'r', encoding='utf-8') as f:
            js_content = f.read()
            # 提取映射
            pattern = r"'([^']+)':\s*'([^']+)'"
            for match in re.finditer(pattern, js_content):
                name, classes = match.groups()
                existing_mappings[classes] = name

    return existing_mappings

# 检查哪些已经存在于JS文件中
existing_mappings = load_js_mappings()
filtered_df['已映射'] = filtered_df['类组合'].apply(lambda x: x in existing_mappings)
filtered_df['现有名称'] = filtered_df['类组合'].apply(lambda x: existing_mappings.get(x, ''))

# 显示更新后的表格
display(HTML(filtered_df[['类组合', '出现次数', '长度', '已映射', '现有名称', '上下文']].to_html(escape=False)))

In [None]:
# 第四个单元格：自动生成唯一的语义化名称建议
def suggest_name(combo, contexts, existing_names=None):
    """智能推荐语义化类名，确保名称唯一性"""
    if existing_names is None:
        existing_names = set()

    # 从上下文提取线索
    section_ids = [ctx['section_id'] for ctx in contexts if ctx['section_id']]
    tags = [ctx['tag'] for ctx in contexts]
    common_tag = max(set(tags), key=tags.count) if tags else ""

    # 从类名提取关键特征
    classes = combo.split()

    # 识别区域
    area = None
    for section in section_ids:
        if section in ['header', 'footer', 'navbar', 'main', 'gallery', 'about', 'banner']:
            area = section
            break

    # 识别元素类型
    element_type = None
    if common_tag in ['a', 'button']:
        element_type = 'btn'
    elif common_tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
        element_type = 'heading'
    elif common_tag == 'p':
        element_type = 'text'
    elif common_tag == 'div':
        if any('flex' in cls for cls in classes):
            element_type = 'flex-container'
        elif any('grid' in cls for cls in classes):
            element_type = 'grid'
        else:
            element_type = 'container'
    elif common_tag == 'span':
        element_type = 'label'
    elif common_tag == 'img':
        element_type = 'image'

    # 识别特殊样式特征
    style_features = []
    if any('text-' in cls for cls in classes):
        style_features.append('text')
    if any('bg-' in cls for cls in classes):
        style_features.append('bg')
    if any('dark:' in cls for cls in classes):
        style_features.append('themed')
    if any('hover:' in cls for cls in classes):
        style_features.append('interactive')
    if any('rounded' in cls for cls in classes):
        style_features.append('rounded')
    if any('shadow' in cls for cls in classes):
        style_features.append('shadow')
    if any('border' in cls for cls in classes):
        style_features.append('bordered')
    if any('p-' in cls for cls in classes) or any('px-' in cls for cls in classes) or any('py-' in cls for cls in classes):
        style_features.append('padded')
    if any('m-' in cls for cls in classes) or any('mx-' in cls for cls in classes) or any('my-' in cls for cls in classes):
        style_features.append('margin')

    # 组合生成名称
    name_parts = []
    if area:
        name_parts.append(area)
    if element_type:
        name_parts.append(element_type)

    # 添加一个样式特征以增加区分度
    if style_features and (not element_type or element_type in ['container', 'text']):
        # 选择最具区分性的特征
        priority_features = ['interactive', 'themed', 'shadow', 'rounded', 'bordered', 'bg', 'padded', 'margin', 'text']
        for feature in priority_features:
            if feature in style_features:
                name_parts.append(feature)
                break

    # 如果没有足够信息，使用通用名称
    if not name_parts:
        base_name = "ui-component"
    else:
        base_name = "-".join(name_parts)

    # 确保名称唯一
    final_name = base_name
    i = 1
    while final_name in existing_names:
        # 如果已存在，尝试添加更多样式特征以区分
        if i == 1 and style_features and len(style_features) > 1:
            # 找到一个尚未使用的特征
            unused_features = [f for f in style_features if f not in name_parts]
            if unused_features:
                name_parts.append(unused_features[0])
                final_name = "-".join(name_parts)
                i += 1
                continue

        final_name = f"{base_name}-{i}"
        i += 1

    return final_name

# 处理参数化模板（颜色变体）
def create_template_names(templates):
    """为参数化模板创建名称"""
    template_names = {}
    for template, variants in templates.items():
        # 分析第一个变体来决定基础名称
        first_variant = variants[0]
        contexts = df[df['类组合'] == first_variant]['详细上下文'].values[0] if not df[df['类组合'] == first_variant].empty else []

        # 提取颜色名称作为参数
        color_pattern = r"bg-([a-z]+)-\d+"
        color_match = re.search(color_pattern, first_variant)
        color_name = color_match.group(1) if color_match else "color"

        # 生成模板名称
        base_name = suggest_name(first_variant, contexts)
        template_name = f"{base_name}-{{{color_name}}}"

        template_names[template] = {
            'name': template_name,
            'variants': variants
        }

    return template_names

# 加载现有的JS映射
existing_mappings = load_js_mappings() if 'load_js_mappings' in globals() else {}
existing_names = set(existing_mappings.values())

# 为模板创建名称
template_names = create_template_names(global_templates) if 'global_templates' in globals() else {}

# 为未映射的项生成建议名称，确保唯一
names_used = set(existing_mappings.values())  # 跟踪已使用的名称

# 首先处理参数化模板中的变体
template_variants = []
for template_info in template_names.values():
    template_variants.extend(template_info['variants'])

# 按出现次数排序，优先处理高频项
sorted_df = filtered_df.sort_values(by=['出现次数', '长度'], ascending=[False, False]).copy()

# 再为其他未映射项生成建议名称
for idx, row in sorted_df.iterrows():
    if row['类组合'] in existing_mappings:
        sorted_df.at[idx, '建议名称'] = existing_mappings[row['类组合']]
    elif row['类组合'] not in template_variants:
        suggested_name = suggest_name(row['类组合'], row['详细上下文'], names_used)
        sorted_df.at[idx, '建议名称'] = suggested_name
        names_used.add(suggested_name)  # 立即添加到已使用集合中
    else:
        sorted_df.at[idx, '建议名称'] = "（作为模板变体处理）"

# 将排序后的结果合并回原始DataFrame
filtered_df['建议名称'] = sorted_df['建议名称']

# 显示参数化模板信息
if template_names:
    print("参数化模板信息:")
    for template, info in template_names.items():
        print(f"\n模板名称: {info['name']}")
        print("适用于:")
        for variant in info['variants']:
            print(f"  - {variant}")

# 显示带有建议名称的表格
display(HTML(filtered_df[['类组合', '出现次数', '建议名称', '上下文']].to_html(escape=False)))

# 检查是否有名称冲突
name_counts = Counter(filtered_df['建议名称'].dropna())
duplicates = [name for name, count in name_counts.items() if count > 1 and name != "（作为模板变体处理）"]

if duplicates:
    print("\n警告：检测到名称冲突！")
    for dup in duplicates:
        print(f"\n冲突名称: {dup}")
        for idx, row in filtered_df[filtered_df['建议名称'] == dup].iterrows():
            print(f"  - 类组合: {row['类组合']}")

In [None]:
# 第五个单元格：交互式编辑建议名称（带冲突检测）
# 仅处理未映射的项，排除模板变体
template_variants = []
if 'template_names' in globals():
    for template_info in template_names.values():
        template_variants.extend(template_info['variants'])

unmapped_df = filtered_df[~filtered_df['已映射']].copy()
unmapped_df = unmapped_df[~unmapped_df['类组合'].isin(template_variants)]

# 用于跟踪名称冲突的函数
def check_name_conflict(name, index, all_names):
    """检查名称是否与现有名称冲突"""
    return any(idx != index and n == name for idx, n in all_names.items())

# 存储当前编辑的名称
current_names = {}
for i, row in unmapped_df.iterrows():
    current_names[i] = row['建议名称']

# 创建文本输入控件
text_widgets = []
status_widgets = []

for i, row in unmapped_df.iterrows():
    # 名称输入框
    w = widgets.Text(
        value=row['建议名称'],
        description=f'#{i}:',
        style={'description_width': '50px'},
        layout=widgets.Layout(width='70%')
    )

    # 状态指示器
    status = widgets.HTML(
        value='<span style="color:green;">✓ 有效名称</span>',
        layout=widgets.Layout(width='20%', padding='8px 0')
    )

    text_widgets.append((i, w))
    status_widgets.append((i, status))

    # 显示上下文和类组合
    display(HTML(f"<b>类组合 {i}，次数{row['出现次数']}:</b> {row['类组合']}<br><b>上下文:</b> {row['上下文']}"))

    # 创建水平布局容器
    box = widgets.HBox([w, status])
    display(box)
    display(HTML("<hr>"))

    # 添加实时冲突检测
    def update_status(change, idx=i, widget=status):
        name = change['new']
        current_names[idx] = name
        if not name:
            widget.value = '<span style="color:orange;">⚠️ 名称为空</span>'
        elif check_name_conflict(name, idx, current_names):
            widget.value = '<span style="color:red;">❌ 名称冲突</span>'
        else:
            widget.value = '<span style="color:green;">✓ 有效名称</span>'

    w.observe(update_status, names='value')

# 采集编辑后的名称
edit_button = widgets.Button(description="确认编辑")
output = widgets.Output()

@output.capture()
def on_edit_button_clicked(b):
    # 检查是否有冲突
    has_conflicts = False
    for i, widget in text_widgets:
        name = widget.value
        if not name or check_name_conflict(name, i, current_names):
            has_conflicts = True
            break

    if has_conflicts:
        print("⚠️ 检测到名称冲突或空名称，请修正后再试！")
        return

    # 更新名称
    for i, widget in text_widgets:
        unmapped_df.at[i, '用户编辑名称'] = widget.value

    print("✅ 名称已更新！所有名称均唯一，没有冲突。")

edit_button.on_click(on_edit_button_clicked)
display(edit_button)
display(output)

In [None]:
# 第六个单元格：更新JS映射文件（支持参数化模板）
def update_js_file(js_file, new_mappings, template_mappings=None):
    """更新JS映射文件，支持参数化模板"""
    if template_mappings is None:
        template_mappings = {}

    if os.path.exists(js_file):
        with open(js_file, 'r', encoding='utf-8') as f:
            content = f.read()
    else:
        # 创建基本模板，包含参数化支持
        content = """/**
 * Tailwind类简化映射
 *
 * 这个文件定义了常用Tailwind类组合的简化映射，目的是:
 * 1. 减少HTML文件中的重复代码
 * 2. 提高可读性和可维护性
 * 3. 减少LLM上下文窗口大小
 */

// 定义类名映射
const twClasses = {
};

// 定义参数化模板（支持颜色等变体）
const twTemplates = {
};

// 页面加载时应用类名
document.addEventListener('DOMContentLoaded', function() {
  console.log('正在应用Tailwind类映射...');

  // 应用普通类映射
  const elements = document.querySelectorAll('[data-tw]');
  elements.forEach(applyClasses);

  // 应用参数化模板
  const paramElements = document.querySelectorAll('[data-tw-param]');
  paramElements.forEach(applyParamClasses);

  console.log('Tailwind类映射应用完成!');
});

// 应用普通类
function applyClasses(el) {
  const classNames = el.getAttribute('data-tw').split(' ');
  classNames.forEach(name => {
    if (twClasses[name]) {
      twClasses[name].split(' ').forEach(cls => {
        el.classList.add(cls);
      });
    }
  });
  // 标记为已处理
  el.classList.add('tw-processed');
}

// 应用参数化模板
function applyParamClasses(el) {
  const paramData = el.getAttribute('data-tw-param').split(':');
  if (paramData.length >= 2) {
    const templateName = paramData[0];
    const params = paramData[1].split(',');

    // 检查是否有匹配的模板
    if (twTemplates[templateName]) {
      let classString = twTemplates[templateName];

      // 替换颜色参数
      const colorPattern = /{([a-z]+)}/g;
      let match;
      let paramIndex = 0;

      while ((match = colorPattern.exec(classString)) !== null) {
        const paramName = match[1];
        const paramValue = params[paramIndex] || paramName;
        classString = classString.replace(new RegExp(`{${paramName}}`, 'g'), paramValue);
        paramIndex++;
      }

      // 添加处理后的类
      classString.split(' ').forEach(cls => {
        el.classList.add(cls);
      });
    }
  }
  // 标记为已处理
  el.classList.add('tw-processed');
}

// 添加辅助函数
window.addTwClass = function(name, classes) {
  twClasses[name] = classes;
};

window.addTwTemplate = function(name, template) {
  twTemplates[name] = template;
};
"""

    # 准备更新内容
    twClasses_content = ""
    twTemplates_content = ""

    # 按类别整理普通映射
    categories = {
        'nav': '导航相关',
        'header': '导航相关',
        'card': '卡片相关',
        'banner': '横幅相关',
        'hero': '横幅相关',
        'flex': '布局相关',
        'container': '布局相关',
        'grid': '布局相关',
        'badge': '装饰相关',
        'avatar': '装饰相关',
        'section': '装饰相关',
        'btn': '按钮相关',
        'button': '按钮相关',
        'text': '文本相关',
        'heading': '文本相关',
        'title': '文本相关',
        'theme': '主题相关',
        'footer': '底部栏相关'
    }

    categorized_mappings = {}
    for class_combo, name in new_mappings.items():
        category = '其他组件'
        for key, cat_name in categories.items():
            if key in name:
                category = cat_name
                break

        if category not in categorized_mappings:
            categorized_mappings[category] = []
        categorized_mappings[category].append((name, class_combo))

    # 生成常规映射内容
    for category, items in sorted(categorized_mappings.items()):
        twClasses_content += f"  // {category}\n"
        for name, class_combo in sorted(items):
            twClasses_content += f"  '{name}': '{class_combo}',\n"
        twClasses_content += "\n"

    # 生成参数化模板内容
    if template_mappings:
        for template_name, template_value in template_mappings.items():
            twTemplates_content += f"  '{template_name}': '{template_value}',\n"
        twTemplates_content += "\n"

    # 更新JS文件
    if 'twClasses = {' in content:
        content = re.sub(
            r'const\s+twClasses\s*=\s*{[^}]*}',
            f'const twClasses = {{\n{twClasses_content.rstrip()}\n}}',
            content
        )

    if 'twTemplates = {' in content:
        content = re.sub(
            r'const\s+twTemplates\s*=\s*{[^}]*}',
            f'const twTemplates = {{\n{twTemplates_content.rstrip()}\n}}',
            content
        )

    # 保存更新后的文件
    with open(js_file, 'w', encoding='utf-8') as f:
        f.write(content)

    return f"映射已保存到 {js_file}"

# 准备模板映射
template_mapping = {}
if 'template_names' in globals():
    for template, info in template_names.items():
        template_name = info['name']
        # 替换颜色名称为参数占位符
        parametrized = template
        for variant in info['variants']:
            color_pattern = r"bg-([a-z]+)-\d+"
            match = re.search(color_pattern, variant)
            if match:
                color_name = match.group(1)
                parametrized = re.sub(
                    f"bg-{color_name}-([0-9]+)",
                    "bg-{color}-\\1",
                    parametrized
                )
                parametrized = re.sub(
                    f"dark:bg-{color_name}-([0-9]+)",
                    "dark:bg-{color}-\\1",
                    parametrized
                )
                break
        template_mapping[template_name] = parametrized

# 获取用户编辑的映射
if 'unmapped_df' in globals() and 'user_edited' not in globals():
    try:
        user_edited = unmapped_df[['类组合', '用户编辑名称']].set_index('类组合').to_dict()['用户编辑名称']
    except:
        # 如果用户尚未编辑，使用建议名称
        user_edited = unmapped_df[['类组合', '建议名称']].set_index('类组合').to_dict()['建议名称']
elif 'user_edited' not in globals():
    user_edited = {}

# 更新JS文件的按钮和配置选项
update_button = widgets.Button(description="更新JS文件")
js_file_input = widgets.Text(value='tailwind-classes.js', description='JS文件:')
include_templates = widgets.Checkbox(value=True, description='包含参数化模板', indent=False)
update_output = widgets.Output()

display(js_file_input)
display(include_templates)
display(update_button)
display(update_output)

@update_output.capture()
def on_update_clicked(b):
    templates_to_use = template_mapping if include_templates.value else {}
    result = update_js_file(js_file_input.value, user_edited, templates_to_use)
    print(result)

    if templates_to_use:
        print(f"\n已添加 {len(templates_to_use)} 个参数化模板")

        # 显示使用参数化模板的HTML示例
        print("\n参数化模板使用示例:")
        for template_name in templates_to_use:
            color_name = re.search(r'{([a-z]+)}', template_name).group(1) if re.search(r'{([a-z]+)}', template_name) else 'color'
            print(f'<div data-tw-param="{template_name}:indigo"></div>  <!-- 使用indigo颜色 -->')
            print(f'<div data-tw-param="{template_name}:pink"></div>  <!-- 使用pink颜色 -->')

update_button.on_click(on_update_clicked)

In [None]:
# 第七个单元格：将映射应用到HTML生成新文件（文本替换方式）
def apply_mappings_text_based(html_file, js_file, output_file=None):
    """使用文本替换方式应用映射到HTML文件，保留原始格式"""
    # 确定输出文件名
    if output_file is None:
        name, ext = os.path.splitext(html_file)
        output_file = f"{name}_mapped{ext}"

    # 加载常规映射
    regular_mappings = load_js_mappings(js_file)

    # 加载参数化模板映射（从JS文件直接提取）
    template_mappings = {}
    if os.path.exists(js_file):
        with open(js_file, 'r', encoding='utf-8') as f:
            js_content = f.read()
            # 提取模板
            template_pattern = r"twTemplates\s*=\s*{([^}]*)}"
            match = re.search(template_pattern, js_content, re.DOTALL)
            if match:
                template_content = match.group(1)
                for m in re.finditer(r"'([^']+)':\s*'([^']+)'", template_content):
                    template_name, template_value = m.groups()
                    template_mappings[template_name] = template_value

    # 读取HTML为文本
    with open(html_file, 'r', encoding='utf-8') as f:
        html_content = f.read()

    # 替换常规映射
    replaced_count = 0
    template_replaced_count = 0

    for class_str, tw_name in regular_mappings.items():
        # 精确匹配整个class属性
        pattern = r'class=["\']' + re.escape(class_str) + r'["\']'
        replacement = f'data-tw="{tw_name}"'

        # 计算并执行替换
        original_content = html_content
        html_content = re.sub(pattern, replacement, html_content)

        # 统计替换次数
        if html_content != original_content:
            replaced_count += original_content.count(f'class="{class_str}"') + original_content.count(f"class='{class_str}'")

    # 处理参数化模板
    color_names = ['red', 'blue', 'green', 'yellow', 'indigo', 'purple', 'pink',
                  'gray', 'orange', 'teal', 'cyan', 'amber', 'lime', 'emerald']

    # 创建颜色模式对照表
    color_patterns = {}
    for template_name, template_value in template_mappings.items():
        # 对每个颜色创建具体的样式
        for color in color_names:
            # 替换模板中的{color}为具体颜色
            concrete_class = template_value.replace('{color}', color)

            # 保存到映射表
            if concrete_class not in color_patterns:
                color_patterns[concrete_class] = (template_name, color)

    # 应用颜色模式替换
    for concrete_class, (template_name, color) in color_patterns.items():
        pattern = r'class=["\']' + re.escape(concrete_class) + r'["\']'
        replacement = f'data-tw-param="{template_name}:{color}"'

        # 执行替换
        original_content = html_content
        html_content = re.sub(pattern, replacement, html_content)

        # 统计替换次数
        if html_content != original_content:
            count = original_content.count(f'class="{concrete_class}"') + original_content.count(f"class='{concrete_class}'")
            template_replaced_count += count
            if count > 0:
                print(f"替换了 {count} 个模板: {concrete_class} → {template_name}:{color}")

    # 保存更新后的HTML
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(html_content)

    # 分析未处理的类
    soup = BeautifulSoup(html_content, 'lxml')
    remaining_classes = Counter()

    for element in soup.find_all(True):
        if 'class' in element.attrs and element['class']:
            class_str = ' '.join(element['class'])
            remaining_classes[class_str] += 1

    # 筛选长类组合
    long_classes = [(cls, count, len(cls)) for cls, count in remaining_classes.items() if len(cls) >= 30]
    long_classes.sort(key=lambda x: x[2], reverse=True)  # 按长度排序

    return {
        'output_file': output_file,
        'replaced_count': replaced_count,
        'template_replaced_count': template_replaced_count,
        'remaining_long_classes': long_classes[:10]  # 只返回前10个最长的
    }

# 创建应用映射的UI元素
html_file_input = widgets.Text(value='index.html', description='HTML文件:')
output_file_input = widgets.Text(value='', description='输出文件:', placeholder='留空使用默认名称')
apply_button = widgets.Button(description="应用映射")
apply_output = widgets.Output()

display(HTML("<h3>应用映射到HTML文件</h3>"))
display(html_file_input)
display(js_file_input)  # 重用前面的输入
display(output_file_input)
display(apply_button)
display(apply_output)

@apply_output.capture()
def on_apply_clicked(b):
    output_file = output_file_input.value or None
    result = apply_mappings_text_based(html_file_input.value, js_file_input.value, output_file)

    print(f"已生成更新后的HTML文件: {result['output_file']}")
    print(f"替换了 {result['replaced_count']} 个常规class属性")

    if result['template_replaced_count'] > 0:
        print(f"替换了总计 {result['template_replaced_count']} 个参数化模板class属性")
    else:
        print("没有找到匹配的参数化模板")

    if result['remaining_long_classes']:
        print("\n仍有以下长类组合未处理:")
        for cls, count, length in result['remaining_long_classes']:
            print(f"- 【{count}次】长度{length}: {cls}")

apply_button.on_click(on_apply_clicked)

In [None]:
# 第四个单元格：自动生成语义化名称建议
def suggest_name(combo, contexts):
    """智能推荐语义化类名"""
    # 从上下文提取线索
    section_ids = [ctx['section_id'] for ctx in contexts if ctx['section_id']]
    tags = [ctx['tag'] for ctx in contexts]
    common_tag = max(set(tags), key=tags.count) if tags else ""

    # 从类名提取关键特征
    classes = combo.split()

    # 识别区域
    area = None
    for section in section_ids:
        if section in ['header', 'footer', 'navbar', 'main', 'gallery', 'about', 'banner']:
            area = section
            break

    # 识别元素类型
    element_type = None
    if common_tag in ['a', 'button']:
        element_type = 'btn'
    elif common_tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
        element_type = 'heading'
    elif common_tag == 'p':
        element_type = 'text'
    elif common_tag == 'div':
        if any('flex' in cls for cls in classes):
            element_type = 'flex-container'
        elif any('grid' in cls for cls in classes):
            element_type = 'grid'
    elif common_tag == 'span':
        element_type = 'label'
    elif common_tag == 'img':
        element_type = 'image'

    # 识别特殊样式特征
    style_features = []
    if any('text-' in cls for cls in classes):
        style_features.append('text')
    if any('bg-' in cls for cls in classes):
        style_features.append('bg')
    if any('dark:' in cls for cls in classes):
        style_features.append('themed')
    if any('hover:' in cls for cls in classes):
        style_features.append('interactive')
    if any('rounded' in cls for cls in classes):
        style_features.append('rounded')
    if any('shadow' in cls for cls in classes):
        style_features.append('shadow')

    # 组合生成名称
    name_parts = []
    if area:
        name_parts.append(area)
    if element_type:
        name_parts.append(element_type)
    elif style_features:
        name_parts.append(style_features[0])

    # 如果没有足够信息，使用通用名称
    if not name_parts:
        return "ui-component"

    return "-".join(name_parts)

# 为未映射的项生成建议名称
filtered_df['建议名称'] = filtered_df.apply(
    lambda row: suggest_name(row['类组合'], row['详细上下文']) if not row['已映射'] else row['现有名称'],
    axis=1
)

# 显示带有建议名称的表格
display(HTML(filtered_df[['类组合', '出现次数', '建议名称', '上下文']].to_html(escape=False)))

In [None]:
# 第五个单元格：交互式编辑建议名称
# 仅处理未映射的项
unmapped_df = filtered_df[~filtered_df['已映射']].copy()

# 创建文本输入控件
text_widgets = []
for i, row in unmapped_df.iterrows():
    w = widgets.Text(
        value=row['建议名称'],
        description=f'#{i}:',
        style={'description_width': '50px'},
        layout=widgets.Layout(width='80%')
    )
    text_widgets.append((i, w))

    # 显示上下文和类组合
    display(HTML(f"<b>类组合 {i}，次数{row['出现次数']}:</b> {row['类组合']}<br><b>上下文:</b> {row['上下文']}"))
    display(w)
    display(HTML("<hr>"))

# 采集编辑后的名称
edit_button = widgets.Button(description="确认编辑")
output = widgets.Output()

@output.capture()
def on_edit_button_clicked(b):
    for i, widget in text_widgets:
        unmapped_df.at[i, '用户编辑名称'] = widget.value
    print("名称已更新!")

edit_button.on_click(on_edit_button_clicked)
display(edit_button)
display(output)

In [None]:
unmapped_df

In [None]:
# 第六个单元格：更新JS映射文件
def update_js_file(js_file, new_mappings):
    """更新JS映射文件"""
    if os.path.exists(js_file):
        with open(js_file, 'r', encoding='utf-8') as f:
            content = f.read()
    else:
        # 创建基本模板
        content = """/**
 * Tailwind类简化映射
 *
 * 这个文件定义了常用Tailwind类组合的简化映射，目的是:
 * 1. 减少HTML文件中的重复代码
 * 2. 提高可读性和可维护性
 * 3. 减少LLM上下文窗口大小
 */

// 定义类名映射
const twClasses = {
};

// 页面加载时应用类名
document.addEventListener('DOMContentLoaded', function() {
  const elements = document.querySelectorAll('[data-tw]');
  elements.forEach(el => {
    const classNames = el.getAttribute('data-tw').split(' ');
    classNames.forEach(name => {
      if (twClasses[name]) {
        twClasses[name].split(' ').forEach(cls => {
          el.classList.add(cls);
        });
      }
    });
  });
});

window.addTwClass = function(name, classes) {
  twClasses[name] = classes;
};
"""

    # 寻找twClasses对象
    pattern = r'const\s+twClasses\s*=\s*{([^}]*)}'
    match = re.search(pattern, content, re.DOTALL)

    if match:
        # 保留现有映射
        existing_content = match.group(1)

        # 将现有内容解析为字典
        existing_mappings = {}
        # 提取现有分类注释
        categories = re.findall(r'//\s*(.*?)\s*\n', existing_content)
        category_positions = {}

        # 提取现有映射
        for line in existing_content.strip().split('\n'):
            line = line.strip()
            if line and not line.startswith('//'):
                # 处理键值对
                match = re.search(r"'([^']+)':\s*'([^']+)'", line)
                if match:
                    name, classes = match.groups()
                    existing_mappings[name] = classes

        # 合并新旧映射
        all_mappings = {**existing_mappings}
        for class_combo, name in new_mappings.items():
            all_mappings[name] = class_combo

        # 按照原有分类组织输出
        updated_content = ""

        # 添加导航相关
        updated_content += "  // 导航相关\n"
        for name in sorted([k for k in all_mappings.keys() if 'nav' in k]):
            updated_content += f"  '{name}': '{all_mappings[name]}',\n"

        # 添加卡片相关
        updated_content += "\n  // 卡片相关\n"
        for name in sorted([k for k in all_mappings.keys() if 'card' in k]):
            updated_content += f"  '{name}': '{all_mappings[name]}',\n"

        # 添加横幅相关
        updated_content += "\n  // 横幅相关\n"
        for name in sorted([k for k in all_mappings.keys() if 'banner' in k]):
            updated_content += f"  '{name}': '{all_mappings[name]}',\n"

        # 添加布局相关
        updated_content += "\n  // 常用布局\n"
        for name in sorted([k for k in all_mappings.keys() if 'flex' in k or 'grid' in k]):
            updated_content += f"  '{name}': '{all_mappings[name]}',\n"

        # 添加装饰相关
        updated_content += "\n  // 常用装饰\n"
        for name in sorted([k for k in all_mappings.keys() if 'badge' in k or 'avatar' in k or 'section' in k]):
            updated_content += f"  '{name}': '{all_mappings[name]}',\n"

        # 添加主题相关
        updated_content += "\n  // 主题相关\n"
        for name in sorted([k for k in all_mappings.keys() if 'theme' in k]):
            updated_content += f"  '{name}': '{all_mappings[name]}',\n"

        # 添加其他未分类项
        other_keys = [k for k in all_mappings.keys() if not any(x in k for x in
                     ['nav', 'card', 'banner', 'flex', 'grid', 'badge', 'avatar', 'section', 'theme'])]
        if other_keys:
            updated_content += "\n  // 其他组件\n"
            for name in sorted(other_keys):
                updated_content += f"  '{name}': '{all_mappings[name]}',\n"

        # 移除最后一个逗号
        updated_content = updated_content.rstrip(',\n') + '\n'

        # 更新JS内容
        updated_js = re.sub(pattern, f"const twClasses = {{\n{updated_content}}}", content, flags=re.DOTALL)
    else:
        # 找不到对象，插入新映射
        mappings_text = ""
        # 按类别组织新映射
        categories = {
            'nav': '导航相关',
            'card': '卡片相关',
            'banner': '横幅相关',
            'flex': '布局相关',
            'grid': '布局相关',
            'badge': '装饰相关',
            'avatar': '装饰相关',
            'section': '装饰相关',
            'theme': '主题相关'
        }

        # 分类整理
        categorized = {}
        for class_combo, name in new_mappings.items():
            category = 'other'
            for key, cat_name in categories.items():
                if key in name:
                    category = cat_name
                    break

            if category not in categorized:
                categorized[category] = []
            categorized[category].append((name, class_combo))

        # 按分类生成文本
        for category, items in categorized.items():
            if items:
                mappings_text += f"\n  // {category}\n"
                for name, class_combo in sorted(items):
                    mappings_text += f"  '{name}': '{class_combo}',\n"

        # 移除最后一个逗号
        mappings_text = mappings_text.rstrip(',\n') + '\n'

        updated_js = re.sub(r'const twClasses = \{\s*\};',
                           f"const twClasses = {{{mappings_text}}};",
                           content)

    # 保存文件
    with open(js_file, 'w', encoding='utf-8') as f:
        f.write(updated_js)

    return f"映射已保存到 {js_file}"

# 获取用户编辑的映射
if 'user_edited' not in globals():
    # 首次运行时初始化
    try:
        user_edited = unmapped_df[['类组合', '用户编辑名称']].set_index('类组合').to_dict()['用户编辑名称']
    except:
        # 如果用户尚未编辑，使用建议名称
        user_edited = unmapped_df[['类组合', '建议名称']].set_index('类组合').to_dict()['建议名称']

# 更新JS文件的按钮
update_button = widgets.Button(description="更新JS文件")
js_file_input = widgets.Text(value='tailwind-classes.js', description='JS文件:')
update_output = widgets.Output()

display(js_file_input)
display(update_button)
display(update_output)

@update_output.capture()
def on_update_clicked(b):
    result = update_js_file(js_file_input.value, user_edited)
    print(result)

update_button.on_click(on_update_clicked)

In [None]:
# 第七个单元格：将映射应用到HTML生成新文件
def apply_mappings(html_file, js_file, output_file=None):
    """应用映射到HTML文件"""
    # 确定输出文件名
    if output_file is None:
        name, ext = os.path.splitext(html_file)
        output_file = f"{name}_mapped{ext}"

    # 加载映射
    mappings = load_js_mappings(js_file)

    # 读取HTML
    with open(html_file, 'r', encoding='utf-8') as f:
        soup = BeautifulSoup(f, 'lxml')

    # 应用映射
    replaced_count = 0
    for element in soup.find_all(True):
        if 'class' in element.attrs and element['class']:
            class_str = ' '.join(element['class'])

            if class_str in mappings:
                # 替换为data-tw属性
                element['data-tw'] = mappings[class_str]
                del element['class']
                replaced_count += 1

    # 保存更新后的HTML
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(soup.prettify())

    # 统计仍未处理的类
    remaining_df = analyze_remaining_classes(soup)

    return {
        'output_file': output_file,
        'replaced_count': replaced_count,
        'remaining_classes': remaining_df
    }

def analyze_remaining_classes(soup):
    """分析仍未处理的类"""
    classes = Counter()
    for element in soup.find_all(True):
        if 'class' in element.attrs and element['class']:
            class_str = ' '.join(element['class'])
            classes[class_str] += 1

    # 转换为DataFrame
    data = []
    for cls, count in classes.items():
        data.append({
            '类组合': cls,
            '出现次数': count,
            '长度': len(cls)
        })

    df = pd.DataFrame(data)
    # 按频率和长度排序
    return df.sort_values(['出现次数', '长度'], ascending=False)

# 创建应用映射的UI元素
html_file_input = widgets.Text(value='index.html', description='HTML文件:')
output_file_input = widgets.Text(value='', description='输出文件:', placeholder='留空使用默认名称')
apply_button = widgets.Button(description="应用映射")
apply_output = widgets.Output()

display(HTML("<h3>应用映射到HTML文件</h3>"))
display(html_file_input)
display(js_file_input)  # 重用前面的输入
display(output_file_input)
display(apply_button)
display(apply_output)

@apply_output.capture()
def on_apply_clicked(b):
    output_file = output_file_input.value or None
    result = apply_mappings(html_file_input.value, js_file_input.value, output_file)

    print(f"已生成更新后的HTML文件: {result['output_file']}")
    print(f"替换了 {result['replaced_count']} 个class属性")

    # 显示仍未处理的高频/长类组合
    remaining = result['remaining_classes']
    remaining_filtered = remaining[(remaining['出现次数'] >= 2) & (remaining['长度'] >= 30)]

    if not remaining_filtered.empty:
        print("\n仍有以下高频/长类组合未处理:")
        display(HTML(remaining_filtered.head(10).to_html()))

apply_button.on_click(on_apply_clicked)