In [30]:
import re

def normalize_bracket_format(input_file, output_file):
    with open(input_file, 'r') as infile, open(output_file, 'w') as outfile:
        for line in infile:
            line = line.strip()
            if line:
                # Step 1: Remove backslashes
                line = line.replace('\\', '')

                # Step 2: Normalize spaces
                line = re.sub(r'\s+', ' ', line).strip()

                # Step 3: Convert square brackets to parentheses
                line = line.replace('[', '(').replace(']', ')')

                # Step 4: Adjust the formatting
                # Ensure that each phrase is correctly nested
                line = re.sub(r'(\w+)\s*\(', r'(\1 ', line)  # Ensure a space after the component before '('

                # Wrap NP and AdvP correctly
                line = re.sub(r'(\s*NP\s*\()', r' (NP ', line)
                line = re.sub(r'(\s*Adv P\s*\()', r' (AdvP ', line)

                # Step 5: Correctly format nested structures
                # Wrap individual components within parentheses
                line = re.sub(r'(\w+)\s+', r'(\1) ', line)  # Wrap components in parentheses

                # Step 6: Clean up redundant parentheses
                line = re.sub(r'\s*\(\s*', '(', line)  # Remove space before parentheses
                line = re.sub(r'\s*\)', ')', line)      # Remove space before closing parentheses

                # Wrap the entire structure in parentheses
                line = f'({line.strip()})'

                # Write to output file
                outfile.write(line + '\n')

# Input and output file paths
input_file = 'raw.txt'  # Your input file
output_file = 'refined.txt'  # Your output file

# Normalize the bracket format
normalize_bracket_format(input_file, output_file)
print(f'Refined sentences saved to "{output_file}".')


Refined sentences saved to "refined.txt".
