In [1]:
import glob
import os
import sys
from pathlib import Path

def get_user_selection(options, prompt):
    options = sorted(options, key=lambda x: (Path(x).parent.name, Path(x).name))  # Sort by parent folder and file name
    print(prompt)
    for idx, option in enumerate(options, start=1):
        parent_folder = Path(option).parent.name
        file_name = Path(option).name
        print(f"{idx}. {parent_folder}/{file_name}")
    
    selected_indices = input("Enter the numbers of the files you want to select, separated by commas or ranges (e.g., 1,2,4-6): ").split(",")
    selected_files = []

    for index in selected_indices:
        if '-' in index:
            start, end = map(int, index.split('-'))
            selected_files.extend(options[start-1:end])
        else:
            selected_files.append(options[int(index.strip()) - 1])

    return selected_files

def get_output_directory():
    choice = input("Enter '1' to output to ./Concatenated or '2' to output to the current working directory (default is 1): ") or '1'
    if choice == '1':
        output_folder = Path('./Concatenated')
        output_folder.mkdir(exist_ok=True)
        return output_folder
    elif choice == '2':
        return Path.cwd()
    else:
        print("Invalid choice, defaulting to ./Concatenated")
        output_folder = Path('./Concatenated')
        output_folder.mkdir(exist_ok=True)
        return output_folder

def get_corpus_name(cwd):
    choice = input("Enter '1' to supply a corpus name or '2' to use the current directory name (default is 1): ") or '1'
    if choice == '1':
        return input("Enter the corpus name: ")
    elif choice == '2':
        return os.path.basename(cwd)
    else:
        print("Invalid choice, using the current directory name.")
        return os.path.basename(cwd)

def list_txt_files(directory):
    return sorted([str(path) for path in Path(directory).rglob('*.txt')])

def user_options(first_run=True):
    while True:
        if first_run:
            choice = input("Enter '1' to concatenate all files, '2' to manually specify files to concatenate or '3' to exit: ")
            first_run = False
        else:
            choice = input("Enter '1' to concatenate remaining files, '2' to manually specify files to concatenate, or '3' to exit: ")

        if choice == '1':
            corpus_name = get_corpus_name(cwd)
            output_folder = get_output_directory()
            files_to_concat = list_txt_files(texts_folder)
            if not files_to_concat:
                print("No files remaining to concatenate. Exiting program.")
                sys.exit(0)  # Exit if there are no files to process
        elif choice == '2':
            corpus_name = get_corpus_name(cwd)
            output_folder = get_output_directory()
            files_all = list_txt_files(texts_folder)
            if not files_all:
                print("No files remaining to concatenate. Exiting program.")
                sys.exit(0)
            files_to_concat = get_user_selection(files_all, "Select the files to concatenate:")
            if not files_to_concat:
                print("No files selected. Exiting program.")
                sys.exit(0)
        elif choice == '3':
            print("Exiting program.")
            sys.exit(0)
        else:
            print("Invalid choice.")
            continue

        destination_file = output_folder / f"{corpus_name}.txt"

        print("Files to concatenate:")
        for file in files_to_concat:
            print(Path(file).name)
        print(f"Destination file: {destination_file}")

        # Concatenate the files.
        with open(destination_file, 'w', encoding='utf-8') as outfile:
            for fname in files_to_concat:
                with open(fname, 'r', encoding='utf-8') as infile:
                    outfile.write(infile.read())
                    outfile.write("\n")
        
        print(f"Concatenated file created at: {destination_file}")
        # Exit after concatenation. Remove this line if you want the loop to run again.
        sys.exit(0)

# Set the folder for the input texts
cwd = os.getcwd()
texts_folder = Path(cwd)

# Run user options
user_options()

Enter '1' to concatenate all files, '2' to manually specify files to concatenate or '3' to exit:  1
Enter '1' to supply a corpus name or '2' to use the current directory name (default is 1):  1
Enter the corpus name:  All_files
Enter '1' to output to ./Concatenated or '2' to output to the current working directory (default is 1):  1


Files to concatenate:
Discours des raisons_corrected_stemmed.txt
Démonomanie III_corrected_stemmed.txt
Démonomanie II_corrected_stemmed.txt
Démonomanie IV_corrected_stemmed.txt
Démonomanie I_corrected_stemmed.txt
Démonomanie preface Repair_corrected_stemmed.txt
Harangue - Fontainebleau_corrected_stemmed.txt
Harangue - Orléans 2_corrected_stemmed.txt
Harangue - Orléans_corrected_stemmed.txt
Harangue - Poissy_corrected_stemmed.txt
Harangue - Rouen_corrected_stemmed.txt
Harangue - Saint Germain_corrected_stemmed.txt
Harangue - lit de justice_corrected_stemmed.txt
Harangue - ouverture de parlement_corrected_stemmed.txt
Harangue - parlement 2_corrected_stemmed.txt
Harangue - parlement 3_corrected_stemmed.txt
Harangue - parlement_corrected_stemmed.txt
Harangue - religion_corrected_stemmed.txt
Harangue - septembre_corrected_stemmed.txt
La réponse_corrected_stemmed.txt
Le paradoxe_corrected_stemmed.txt
Lettre_corrected_stemmed.txt
Lit de justice_corrected_stemmed.txt
Memoire - Namur_corrected_

SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
