## For CSV. File Format

In [13]:
import os

# Paths
main_folder_path = r'E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT'
output_folder_path = r'E:\DATA\2025-2026\MERGE_SPREADBOOK\Merged(ALL+EXP+NFT+MCX)'
output_file_path = os.path.join(output_folder_path, 'Merged_NFT.csv')

# Ensure output directory exists
os.makedirs(output_folder_path, exist_ok=True)

# Track header to detect repeated headers
seen_header = None
header_written = False

# Open output file
with open(output_file_path, 'w', encoding='utf-8') as outfile:
    for root, dirs, files in os.walk(main_folder_path):
        for file in files:
            if file.lower().endswith('.csv'):
                file_path = os.path.join(root, file)
                try:
                    with open(file_path, 'r', encoding='utf-8', errors='ignore') as infile:
                        lines = infile.readlines()
                        if not lines:
                            print(f"Skipped empty file: {file_path}")
                            continue
                        
                        # Identify header from current file
                        current_header = lines[0].strip()

                        # Write header only once
                        if not header_written:
                            outfile.write(current_header + '\n')
                            seen_header = current_header
                            header_written = True

                        # Write lines skipping header or repeated headers in between
                        for line in lines[1:]:
                            # Skip lines that match the header exactly
                            if line.strip() == seen_header:
                                continue
                            outfile.write(line)
                    
                    print(f"Merged: {file_path}")
                except Exception as e:
                    print(f"Error processing {file_path}: {e}")

print(f"\n✅ Clean merge complete: '{output_file_path}'")


Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250401.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250402.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250403.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250404.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250407.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250408.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250409.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250411.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250415.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250416.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250417.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250421.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250422.csv
Merged: E:\D

In [17]:
import os
import csv

main_folder_path = r'E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT'
output_folder_path = r'E:\DATA\2025-2026\MERGE_SPREADBOOK\Merged(ALL+EXP+NFT+MCX)'
output_file_path = os.path.join(output_folder_path, 'Merged_NFT.csv')

os.makedirs(output_folder_path, exist_ok=True)

# Your exact header line as a string
header_line = "1\t9999\t\tReferenceNo\tSymbol1\tExpiry1\tprice1\tQty1\tSymbol2\tExpiry2\tStrikePrice2\tOption2\tprice2\tQty2\tSymbol3\tExpiry3\tStrikePrice3\tOption3\tprice3\tQty3\tprice4\tQty4\tSpreadSide\tParity\tParityLive\tDisparity\tSpinName\tEntryTime\tErrorText\tRefFuture\tRefSpot"

# Split header by tab to get list of columns
main_header = header_line.split('\t')

noise_phrases = [
    "order value exceeds", "due to l2 cancel", "due to l3 cancel",
    "groupname", "manager:", "exceeded", "assigned:", "required:"
]

def is_noise_line(line):
    low_line = line.lower()
    return any(phrase in low_line for phrase in noise_phrases)

def parse_and_align(line):
    parts = line.strip().split('\t')
    if len(parts) < len(main_header):
        return None
    # Keep only as many columns as header has
    return parts[:len(main_header)]

print("Starting merge...")

with open(output_file_path, 'w', encoding='utf-8', newline='') as outfile:
    writer = csv.writer(outfile, delimiter='\t')
    writer.writerow(main_header)  # write your exact header properly split

    for root, dirs, files in os.walk(main_folder_path):
        for file in files:
            if file.lower().endswith('.csv'):
                file_path = os.path.join(root, file)
                try:
                    with open(file_path, 'r', encoding='utf-8', errors='ignore') as infile:
                        for line in infile:
                            if ('ReferenceNo' in line) or is_noise_line(line) or not line.strip():
                                continue
                            aligned_row = parse_and_align(line)
                            if aligned_row:
                                writer.writerow(aligned_row)
                    print(f"Merged: {file_path}")
                except Exception as e:
                    print(f"Error processing {file_path}: {e}")

print(f"\n✅ Clean merge complete: '{output_file_path}'")


Starting merge...
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250401.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250402.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250403.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250404.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250407.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250408.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250409.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250411.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250415.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250416.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250417.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_20250421.csv
Merged: E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_NFT\MergedNFT_2025042

## For Xlsx. File Format

In [None]:
import os
from openpyxl import load_workbook, Workbook

# Input/output paths
main_folder_path = r'E:\DATA\2025-2026\MERGE_SPREADBOOK\MERGE_ALL'
output_folder_path = r'E:\DATA\2025-2026\MERGE_SPREADBOOK\Merged'
output_file_path = os.path.join(output_folder_path, 'Merged_ALL.xlsx')

# Ensure output directory exists
os.makedirs(output_folder_path, exist_ok=True)

# Create new workbook for output
merged_wb = Workbook()
merged_ws = merged_wb.active
merged_ws.title = "MergedData"

# Track if header has been written
header_written = False

# Merge function
for root, dirs, files in os.walk(main_folder_path):
    for file in files:
        if file.lower().endswith('.xlsx'):
            file_path = os.path.join(root, file)
            try:
                wb = load_workbook(file_path, read_only=True, data_only=True)
                sheet = wb.active  # assuming data is in the first sheet

                rows = sheet.iter_rows(values_only=True)
                header = next(rows, None)

                if not header:
                    print(f"Skipped empty Excel file: {file_path}")
                    continue

                # Write header only once
                if not header_written:
                    merged_ws.append(header)
                    header_written = True

                # Write the rest of the data
                for row in rows:
                    merged_ws.append(row)

                print(f"Merged: {file_path}")

            except Exception as e:
                print(f"Error processing {file_path}: {e}")

# Save the final merged workbook
merged_wb.save(output_file_path)
print(f"\n✅ Fast Excel merge complete: '{output_file_path}'")
