In [1]:
import glob
import os
import sys
from pathlib import Path

def get_user_selection(options, prompt):
    options = sorted(options, key=lambda x: (Path(x).parent.name, Path(x).name))
    print(prompt)
    for idx, option in enumerate(options, start=1):
        parent_folder = Path(option).parent.name
        file_name = Path(option).name
        print(f"{idx}. {parent_folder}/{file_name}")

    selected_indices = input(
        "Enter the numbers of the files you want to select, separated by commas or ranges (e.g., 1,2,4-6): "
    ).split(",")
    selected_files = []

    for index in selected_indices:
        index = index.strip()
        if '-' in index:
            start, end = map(int, index.split('-'))
            selected_files.extend(options[start-1:end])
        else:
            selected_files.append(options[int(index) - 1])

    return selected_files

def get_output_directory():
    choice = input(
        "Enter '1' to output to ./Concatenated or '2' to output to the current working directory (default is 1): "
    ) or '1'
    if choice == '1':
        output_folder = Path('./Concatenated')
        output_folder.mkdir(exist_ok=True)
        return output_folder
    elif choice == '2':
        return Path.cwd()
    else:
        print("Invalid choice, defaulting to ./Concatenated")
        output_folder = Path('./Concatenated')
        output_folder.mkdir(exist_ok=True)
        return output_folder

def get_corpus_name(cwd):
    choice = input(
        "Enter '1' to supply a corpus name or '2' to use the current directory name (default is 1): "
    ) or '1'
    if choice == '1':
        return input("Enter the corpus name: ").strip()
    elif choice == '2':
        return os.path.basename(cwd)
    else:
        print("Invalid choice, using the current directory name.")
        return os.path.basename(cwd)

def list_txt_files(directory):
    return sorted([str(path) for path in Path(directory).rglob('*.txt')])

def main():
    cwd = os.getcwd()
    texts_folder = Path(cwd)
    all_files = list_txt_files(texts_folder)
    if not all_files:
        print("No .txt files found in the current directory or subdirectories.")
        sys.exit(0)

    processed_files = []

    while True:
        remaining_files = [f for f in all_files if f not in processed_files]
        if not remaining_files:
            print("No remaining files to process. Exiting program.")
            break

        print("\nChoose an action:")
        print("1) Concatenate all remaining files")
        print("2) Manually specify files to concatenate")
        print("3) Exit")
        choice = input("Enter choice (1/2/3): ").strip()

        if choice == '3':
            print("Exiting program.")
            break
        elif choice == '1':
            files_to_concat = remaining_files
        elif choice == '2':
            files_to_concat = get_user_selection(remaining_files, "Select the files to concatenate:")
            if not files_to_concat:
                print("No files selected. Returning to main menu.")
                continue
        else:
            print("Invalid choice. Please enter 1, 2, or 3.")
            continue

        # Ask for corpus name and output directory
        corpus_name = get_corpus_name(cwd)
        output_folder = get_output_directory()
        destination_file = output_folder / f"{corpus_name}.txt"

        # Show what will be concatenated
        print("\nFiles to concatenate:")
        for f in files_to_concat:
            print(f" - {Path(f).name}")
        print(f"Destination file: {destination_file}\n")

        # Perform concatenation
        with open(destination_file, 'w', encoding='utf-8') as outfile:
            for fname in files_to_concat:
                with open(fname, 'r', encoding='utf-8') as infile:
                    outfile.write(infile.read())
                    outfile.write("\n")
        print(f"Concatenated file created at: {destination_file}\n")

        # Mark these files as processed
        processed_files.extend(files_to_concat)

        # Post-job options
        remaining_after = [f for f in all_files if f not in processed_files]
        if not remaining_after:
            print("All files have been processed. Exiting program.")
            break

        print("What would you like to do next?")
        print("1) Exit program")
        print("2) Return to main menu")
        print("3) Concatenate all remaining files with a new corpus name")
        post = input("Enter choice (1/2/3): ").strip() or '1'

        if post == '1':
            print("Exiting program.")
            break
        elif post == '2':
            continue
        elif post == '3':
            # Single-shot concatenate remaining files
            files_to_concat = remaining_after
            corpus_name = get_corpus_name(cwd)
            output_folder = get_output_directory()
            destination_file = output_folder / f"{corpus_name}.txt"

            print("\nFiles to concatenate:")
            for f in files_to_concat:
                print(f" - {Path(f).name}")
            print(f"Destination file: {destination_file}\n")

            with open(destination_file, 'w', encoding='utf-8') as outfile:
                for fname in files_to_concat:
                    with open(fname, 'r', encoding='utf-8') as infile:
                        outfile.write(infile.read())
                        outfile.write("\n")
            print(f"Concatenated file created at: {destination_file}\n")
            break
        else:
            print("Invalid choice. Exiting program.")
            break

if __name__ == '__main__':
    main()


Choose an action:
1) Concatenate all remaining files
2) Manually specify files to concatenate
3) Exit


Enter choice (1/2/3):  2


Select the files to concatenate:
1. Book/Discours des raisons_corrected.txt
2. Book/Démonomanie Repair_corrected.txt
3. Book/Harangue - Fontainebleau_corrected.txt
4. Book/Harangue - Orléans 2_corrected.txt
5. Book/Harangue - Orléans_corrected.txt
6. Book/Harangue - Poissy_corrected.txt
7. Book/Harangue - Rouen_corrected.txt
8. Book/Harangue - Saint Germain_corrected.txt
9. Book/Harangue - lit de justice_corrected.txt
10. Book/Harangue - ouverture de parlement_corrected.txt
11. Book/Harangue - parlement 2_corrected.txt
12. Book/Harangue - parlement 3_corrected.txt
13. Book/Harangue - parlement_corrected.txt
14. Book/Harangue - religion_corrected.txt
15. Book/Harangue - septembre_corrected.txt
16. Book/La réponse_corrected.txt
17. Book/Le paradoxe_corrected.txt
18. Book/Lettre_corrected.txt
19. Book/Lit de justice_corrected.txt
20. Book/Memoire - Namur_corrected.txt
21. Book/Memoire - le but_corrected.txt
22. Book/Memoire au roi_corrected.txt
23. Book/Memoires d'État Refuge_corrected.tx

Enter the numbers of the files you want to select, separated by commas or ranges (e.g., 1,2,4-6):  2,16-18,25,28-29
Enter '1' to supply a corpus name or '2' to use the current directory name (default is 1):  1
Enter the corpus name:  Bodin
Enter '1' to output to ./Concatenated or '2' to output to the current working directory (default is 1):  1



Files to concatenate:
 - Démonomanie Repair_corrected.txt
 - La réponse_corrected.txt
 - Le paradoxe_corrected.txt
 - Lettre_corrected.txt
 - Recueil_corrected.txt
 - République_corrected.txt
 - Théatre_corrected.txt
Destination file: Concatenated/Bodin.txt

Concatenated file created at: Concatenated/Bodin.txt

What would you like to do next?
1) Exit program
2) Return to main menu
3) Concatenate all remaining files with a new corpus name


Enter choice (1/2/3):  3
Enter '1' to supply a corpus name or '2' to use the current directory name (default is 1):  1
Enter the corpus name:  L'Hospital
Enter '1' to output to ./Concatenated or '2' to output to the current working directory (default is 1):  1



Files to concatenate:
 - Discours des raisons_corrected.txt
 - Harangue - Fontainebleau_corrected.txt
 - Harangue - Orléans 2_corrected.txt
 - Harangue - Orléans_corrected.txt
 - Harangue - Poissy_corrected.txt
 - Harangue - Rouen_corrected.txt
 - Harangue - Saint Germain_corrected.txt
 - Harangue - lit de justice_corrected.txt
 - Harangue - ouverture de parlement_corrected.txt
 - Harangue - parlement 2_corrected.txt
 - Harangue - parlement 3_corrected.txt
 - Harangue - parlement_corrected.txt
 - Harangue - religion_corrected.txt
 - Harangue - septembre_corrected.txt
 - Lit de justice_corrected.txt
 - Memoire - Namur_corrected.txt
 - Memoire - le but_corrected.txt
 - Memoire au roi_corrected.txt
 - Memoires d'État Refuge_corrected.txt
 - Memoires d'état_corrected.txt
 - Remonstrances - Royaume_corrected.txt
 - Remonstrances - parlement_corrected.txt
 - Traite Justice VII_corrected.txt
 - Traite Justice VI_corrected.txt
 - Traite Justice V_corrected.txt
 - Traité Justice III_corrected.