In [None]:
import os

def is_text_file(filepath):
    """Check if a file is a text-based file."""
    text_extensions = {'.py', '.sh', '.txt', '.md', '.yaml', '.yml', '.cfg', '.ini',
                       '.csv', '.html', '.css', '.js', '.xml', '.rst'}
    common_no_ext_files = {'Dockerfile', 'Makefile', 'entrypoint', 'split_data', 'get_data'}

    return (any(filepath.endswith(ext) for ext in text_extensions) or
            os.path.basename(filepath) in common_no_ext_files)

def sanitize_content(content):
    """Remove non-printable characters to avoid encoding issues."""
    return ''.join(c for c in content if c.isprintable() or c in '\n\r\t')

def get_all_text_files(directory):
    """Recursively find all text-based files in the specified directory."""
    text_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            if is_text_file(file_path):
                text_files.append(file_path)
    return text_files

def merge_files(file_paths, output_file):
    """Merge all text files into a single output file."""
    with open(output_file, 'w', encoding='utf-8') as out_file:
        for file_path in file_paths:
            try:
                with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                    content = sanitize_content(f.read())

                out_file.write("\n" + "=" * 50 + "\n")
                out_file.write(f"File: {file_path}\n")
                out_file.write(f"Location: {os.path.abspath(file_path)}\n")
                out_file.write("=" * 50 + "\n")
                out_file.write(content + '\n')

            except Exception as e:
                print(f"Skipping {file_path}: {e}")

    print(f"Merged files saved to {output_file}")

if __name__ == "__main__":
    BASE_DIR = os.path.expanduser("~/Desktop/fedn_attack_simulator/examples/iris-sklearn")
    OUTPUT_FILE = os.path.expanduser("~/Desktop/merged_new_sklearn.txt")

    if not os.path.exists(BASE_DIR):
        print(f"Error: The directory {BASE_DIR} does not exist.")
    else:
        all_text_files = get_all_text_files(BASE_DIR)

        if all_text_files:
            merge_files(all_text_files, OUTPUT_FILE)
        else:
            print("No text files found in the specified directory.")

Merged files saved to /Users/sigvard/Desktop/merged_new.txt
