In [1]:
# Step 1: Define directories and list the files for each directory
# Note: You should run this part in your local Jupyter environment where the directories exist

import os

# Define the path for the directory with merged files
merged_output_dir = '/mnt/d/Doc_to_day/ASNA_ForHandLoad/2024-10-28/20241024_merged_output/20241024_merged_output/merged_output/merged_output'

# Step 1.1: List all files in the merged_output directory
merged_files = os.listdir(merged_output_dir)

print("\nFiles in Merged Output Directory (First 10 files):")
print("\n".join(merged_files[:10]))  # Print first 10 files for readability

# Step 2: Go through each file and fix concatenation issues
for file_name in merged_files:
    file_path = os.path.join(merged_output_dir, file_name)

    try:
        # Read the lines from the file
        with open(file_path, 'r', encoding='cp1251') as file:
            lines = file.readlines()

        # Prepare corrected lines
        corrected_lines = []
        for line in lines:
            # Ensure each line ends with a newline character
            if not line.endswith('\n'):
                corrected_lines.append(line + '\n')
            else:
                corrected_lines.append(line)

        # Write the corrected lines back to the file
        with open(file_path, 'w', encoding='cp1251') as file:
            file.writelines(corrected_lines)

    except FileNotFoundError as e:
        print(f"Error reading file: {file_path}, error: {e}")
    except UnicodeDecodeError as e:
        print(f"Encoding error in file: {file_path}, error: {e}")

print("\nAll files in merged_output directory have been checked and corrected where necessary.")



Files in Merged Output Directory (First 10 files):
Org_FHAB_FHAB_1000_20241013T1345.txt
Org_FHAB_FHAB_1000_20241013T1345_RST.txt
Org_FHAB_FHAB_1003_20241013T1345.txt
Org_FHAB_FHAB_1003_20241013T1345_RST.txt
Org_FHAB_FHAB_1004_20241013T1345.txt
Org_FHAB_FHAB_1004_20241013T1345_RST.txt
Org_FHAB_FHAB_1005_20241013T1345.txt
Org_FHAB_FHAB_1005_20241013T1345_RST.txt
Org_FHAB_FHAB_1006_20241013T1345.txt
Org_FHAB_FHAB_1006_20241013T1345_RST.txt

All files in merged_output directory have been checked and corrected where necessary.


In [6]:
# Step 1: Define directories and list the files for each directory
# Note: You should run this part in your local Jupyter environment where the directories exist

import os
import re

# Define the path for the directory with merged files
merged_output_dir = '/mnt/d/Doc_to_day/ASNA_ForHandLoad/2024-10-28/20241024_merged_output/20241024_merged_output/merged_output/merged_output'

# Step 1.1: List all files in the merged_output directory
merged_files = os.listdir(merged_output_dir)

print("\nFiles in Merged Output Directory (First 10 files):")
print("\n".join(merged_files[:10]))  # Print first 10 files for readability

# Step 2: Go through each file and check for concatenated lines without newlines
files_with_concatenation_issues = []

for file_name in merged_files:
    file_path = os.path.join(merged_output_dir, file_name)

    try:
        # Read the lines from the file
        with open(file_path, 'r', encoding='cp1251') as file:
            lines = file.readlines()

        for line_number, line in enumerate(lines, start=1):
            # Check if there are triple quotes, which indicates concatenation issues
            if '"""' in line:
                files_with_concatenation_issues.append((file_name, line_number, line.strip()))

    except FileNotFoundError as e:
        print(f"Error reading file: {file_path}, error: {e}")
    except UnicodeDecodeError as e:
        print(f"Encoding error in file: {file_path}, error: {e}")

# Step 3: Report lines with concatenation issues
print("\nFiles with concatenation issues:")
if files_with_concatenation_issues:
    for file_info in files_with_concatenation_issues[:10]:  # Print first 10 issues for readability
        file_name, line_number, line_content = file_info
        print(f"File: {file_name}, Line: {line_number}, Content: {line_content}")
    print(f"\nTotal number of lines with concatenation issues: {len(files_with_concatenation_issues)}")
else:
    print("No lines with concatenation issues found.")



Files in Merged Output Directory (First 10 files):
Org_FHAB_FHAB_1000_20241013T1345.txt
Org_FHAB_FHAB_1000_20241013T1345_RST.txt
Org_FHAB_FHAB_1003_20241013T1345.txt
Org_FHAB_FHAB_1003_20241013T1345_RST.txt
Org_FHAB_FHAB_1004_20241013T1345.txt
Org_FHAB_FHAB_1004_20241013T1345_RST.txt
Org_FHAB_FHAB_1005_20241013T1345.txt
Org_FHAB_FHAB_1005_20241013T1345_RST.txt
Org_FHAB_FHAB_1006_20241013T1345.txt
Org_FHAB_FHAB_1006_20241013T1345_RST.txt

Files with concatenation issues:
File: Org_FHAB_FHAB_1000_20241013T1345.txt, Line: 6907, Content: "43309_33658_121299"|"отпуск по кассе от 01.10.2024"|"Org_FHAB"|"FHAB_1000"|2|"8902009768"|2024-09-30 22:00:00|"33658"|0|""|""|""|""|"113891"|"4601969007190"|6|"6670300868"|66|155.58|171.13|10.37|264|264|0|0|""|""|0|""|""|""|""|""|""|""|"""17286"|"приходная накладная от 03.09.2024"|"Org_FHAB"|"FHAB_1000"|3|"8902009768"|2024-09-02 22:00:00|"308989029-001"|0|""|""|""|""|"8606633"|"4602210003411"|20|"7724053916"|77|24919.8|27411.8|2492|40700|40700|0|0|""|""|

In [7]:
# Step 1: Define directories and list the files for each directory
# Note: You should run this part in your local Jupyter environment where the directories exist

import os
import re

# Define the path for the directory with merged files
merged_output_dir = '/mnt/d/Doc_to_day/ASNA_ForHandLoad/2024-10-28/20241024_merged_output/20241024_merged_output/merged_output/merged_output'

# Step 1.1: List all files in the merged_output directory
merged_files = os.listdir(merged_output_dir)

print("\nFiles in Merged Output Directory (First 10 files):")
print("\n".join(merged_files[:10]))  # Print first 10 files for readability

# Step 2: Go through each file and check for concatenated lines without newlines
files_with_concatenation_issues = []
files_with_issues_set = set()

for file_name in merged_files:
    file_path = os.path.join(merged_output_dir, file_name)

    try:
        # Read the lines from the file
        with open(file_path, 'r', encoding='cp1251') as file:
            lines = file.readlines()

        for line_number, line in enumerate(lines, start=1):
            # Check if there are triple quotes, which indicates concatenation issues
            if '"""' in line:
                files_with_concatenation_issues.append((file_name, line_number, line.strip()))
                files_with_issues_set.add(file_name)

    except FileNotFoundError as e:
        print(f"Error reading file: {file_path}, error: {e}")
    except UnicodeDecodeError as e:
        print(f"Encoding error in file: {file_path}, error: {e}")

# Step 3: Report lines with concatenation issues
print("\nFiles with concatenation issues:")
if files_with_concatenation_issues:
    for file_info in files_with_concatenation_issues[:10]:  # Print first 10 issues for readability
        file_name, line_number, line_content = file_info
        print(f"File: {file_name}, Line: {line_number}, Content: {line_content}")
    print(f"\nTotal number of lines with concatenation issues: {len(files_with_concatenation_issues)}")

    # Print list of unique files with issues
    print("\nFiles with concatenation issues (unique list):")
    for file_with_issue in sorted(files_with_issues_set):
        print(file_with_issue)
else:
    print("No lines with concatenation issues found.")


Files in Merged Output Directory (First 10 files):
Org_FHAB_FHAB_1000_20241013T1345.txt
Org_FHAB_FHAB_1000_20241013T1345_RST.txt
Org_FHAB_FHAB_1003_20241013T1345.txt
Org_FHAB_FHAB_1003_20241013T1345_RST.txt
Org_FHAB_FHAB_1004_20241013T1345.txt
Org_FHAB_FHAB_1004_20241013T1345_RST.txt
Org_FHAB_FHAB_1005_20241013T1345.txt
Org_FHAB_FHAB_1005_20241013T1345_RST.txt
Org_FHAB_FHAB_1006_20241013T1345.txt
Org_FHAB_FHAB_1006_20241013T1345_RST.txt

Files with concatenation issues:
File: Org_FHAB_FHAB_1000_20241013T1345.txt, Line: 6907, Content: "43309_33658_121299"|"отпуск по кассе от 01.10.2024"|"Org_FHAB"|"FHAB_1000"|2|"8902009768"|2024-09-30 22:00:00|"33658"|0|""|""|""|""|"113891"|"4601969007190"|6|"6670300868"|66|155.58|171.13|10.37|264|264|0|0|""|""|0|""|""|""|""|""|""|""|"""17286"|"приходная накладная от 03.09.2024"|"Org_FHAB"|"FHAB_1000"|3|"8902009768"|2024-09-02 22:00:00|"308989029-001"|0|""|""|""|""|"8606633"|"4602210003411"|20|"7724053916"|77|24919.8|27411.8|2492|40700|40700|0|0|""|""|

In [8]:
# Step 1: Define directories and list the files for each directory
# Note: You should run this part in your local Jupyter environment where the directories exist

import os
import re

# Define the path for the directory with merged files
merged_output_dir = '/mnt/d/Doc_to_day/ASNA_ForHandLoad/2024-10-28/20241024_merged_output/20241024_merged_output/merged_output/merged_output'

# Step 1.1: List all files in the merged_output directory
merged_files = os.listdir(merged_output_dir)

print("\nFiles in Merged Output Directory (First 10 files):")
print("\n".join(merged_files[:10]))  # Print first 10 files for readability

# Step 2: Go through each file and correct concatenated lines with triple quotes
for file_name in merged_files:
    file_path = os.path.join(merged_output_dir, file_name)

    try:
        # Read the lines from the file
        with open(file_path, 'r', encoding='cp1251') as file:
            lines = file.readlines()

        corrected_lines = []
        for line in lines:
            # Check if there are triple quotes, which indicates concatenation issues
            if '"""' in line:
                # Split the line by triple quotes and fix the ending
                parts = line.split('"""')
                if len(parts) > 1:
                    # Add the corrected line with proper newline character
                    corrected_lines.append(parts[0] + '""\n')
                    corrected_lines.append('"' + parts[1].lstrip())
                else:
                    corrected_lines.append(line)
            else:
                corrected_lines.append(line)

        # Write the corrected lines back to the file
        with open(file_path, 'w', encoding='cp1251') as file:
            file.writelines(corrected_lines)

    except FileNotFoundError as e:
        print(f"Error reading file: {file_path}, error: {e}")
    except UnicodeDecodeError as e:
        print(f"Encoding error in file: {file_path}, error: {e}")

print("\nAll files in merged_output directory have been checked and corrected for concatenated lines with triple quotes.")


Files in Merged Output Directory (First 10 files):
Org_FHAB_FHAB_1000_20241013T1345.txt
Org_FHAB_FHAB_1000_20241013T1345_RST.txt
Org_FHAB_FHAB_1003_20241013T1345.txt
Org_FHAB_FHAB_1003_20241013T1345_RST.txt
Org_FHAB_FHAB_1004_20241013T1345.txt
Org_FHAB_FHAB_1004_20241013T1345_RST.txt
Org_FHAB_FHAB_1005_20241013T1345.txt
Org_FHAB_FHAB_1005_20241013T1345_RST.txt
Org_FHAB_FHAB_1006_20241013T1345.txt
Org_FHAB_FHAB_1006_20241013T1345_RST.txt

All files in merged_output directory have been checked and corrected for concatenated lines with triple quotes.
