In [1]:
def convert_format(input_file, output_file):
    """
    Read variant-standard pairs from input file, convert format, and save to output file.
    
    Args:
        input_file (str): Path to input file
        output_file (str): Path to output file
    """
    try:
        # Read the input file
        with open(input_file, 'r', encoding='utf-8') as f:
            text = f.read()
        
        # Split the text into lines
        lines = text.strip().split('\n')
        
        # Process and format the lines
        formatted_lines = []
        for line in lines:
            # Skip lines that don't contain a tab or start with #
            if '\t' not in line or line.startswith('#'):
                continue
                
            # Split the line into variant and standard forms
            variant, standard = line.split('\t')
            
            # Create the formatted output
            formatted_lines.append(f"错误形式：{variant}")
            formatted_lines.append(f"正确形式：{standard}")
            formatted_lines.append("")  # Add empty line
        
        # Write the formatted text to output file
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write('\n'.join(formatted_lines))
            
        print(f"转换完成！结果已保存到：{output_file}")
            
    except FileNotFoundError:
        print(f"错误：找不到输入文件 {input_file}")
    except Exception as e:
        print(f"处理过程中发生错误：{str(e)}")

# 使用示例
input_file = "custom_confusion.txt"  # 输入文件路径
output_file = "formatted_output.txt"  # 输出文件路径
convert_format(input_file, output_file)

转换完成！结果已保存到：formatted_output.txt


In [7]:
def convert_format(input_file, output_file):
    """
    Read the law test file and convert non-zero-starting lines into the desired format.
    
    Args:
        input_file (str): Path to input file
        output_file (str): Path to output file
    """
    try:
        # Read the input file
        with open(input_file, 'r', encoding='utf-8') as f:
            lines = f.readlines()
        
        # Process and format the lines
        formatted_lines = []
        for line in lines:
            # Skip empty lines
            if not line.strip():
                continue
                
            # Parse the line
            parts = line.strip().split('\t')
            if len(parts) < 3:
                continue
                
            # Check if line starts with non-zero
            if not parts[0].startswith('0'):
                # Get the original and correct forms
                original = parts[1].strip()
                correct = parts[2].strip()
                
                # Format the output
                formatted_lines.append(f"错误形式：{original}")
                formatted_lines.append(f"正确形式：{correct}")
                formatted_lines.append("")  # Add empty line
        
        # Write the formatted text to output file
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write('\n'.join(formatted_lines))
            
        print(f"转换完成！结果已保存到：{output_file}")
            
    except FileNotFoundError:
        print(f"错误：找不到输入文件 {input_file}")
    except Exception as e:
        print(f"处理过程中发生错误：{str(e)}")

# 使用示例
input_file = "odw.test"  # 输入文件路径
output_file = "formatted_odw.txt"  # 输出文件路径
convert_format(input_file, output_file)

转换完成！结果已保存到：formatted_odw.txt
