In [1]:
import glob
import os
from pathlib import Path

def get_user_selection(options, prompt):
    options = sorted(options)  # Sort the list of options before displaying
    print(prompt)
    for idx, option in enumerate(options, start=1):
        print(f"{idx}. {option}")
    selected_indices = input("Enter the numbers of the files you want to select, separated by commas: ").split(",")
    selected_indices = [int(index.strip()) - 1 for index in selected_indices]
    return [options[idx] for idx in selected_indices]

def get_output_directory():
    choice = input("Enter '1' to output to ./Concatenated or '2' to output to the current working directory: ")
    if choice == '1':
        output_folder = Path('./Concatenated')
        output_folder.mkdir(exist_ok=True)
        return output_folder
    elif choice == '2':
        return Path.cwd()
    else:
        print("Invalid choice, defaulting to ./Concatenated")
        output_folder = Path('./Concatenated')
        output_folder.mkdir(exist_ok=True)
        return output_folder

def get_corpus_name(cwd):
    choice = input("Enter '1' to supply a corpus name or '2' to use the current directory name: ")
    if choice == '1':
        return input("Enter the corpus name: ")
    elif choice == '2':
        return os.path.basename(cwd)
    else:
        print("Invalid choice, using the current directory name.")
        return os.path.basename(cwd)

# Set the folder for the input texts
cwd = os.getcwd()
texts_folder = Path(cwd)

# Get the corpus name
corpus_name = get_corpus_name(cwd)

# Get the list of .txt files in the current directory
txt_files = sorted(texts_folder.glob('*.txt'))

# Prompt the user to select which files to concatenate
selected_files = get_user_selection(txt_files, "Select which files to concatenate:")

# Prompt the user to select the output directory
output_folder = get_output_directory()
output_file = Path(f'{output_folder}/{corpus_name}.txt')
output_file.touch()

# Concatenate the selected files
with open(output_file, 'w') as f_out:
    for txt in selected_files:
        print(os.path.basename(txt))
        with open(txt, 'r') as f_in:
            fileText = f_in.read()
            f_out.write(fileText + "\n" + "\n")

print(f'Concatenated into {output_file.name}')

Enter '1' to supply a corpus name or '2' to use the current directory name:  1
Enter the corpus name:  L'Hospital


Select which files to concatenate:
1. /home/lucas-jerusalimiec/Documents/OCR Text/Text/Collected Tokenized/Bodin.txt
2. /home/lucas-jerusalimiec/Documents/OCR Text/Text/Collected Tokenized/Discours des raisons_corrected.txt
3. /home/lucas-jerusalimiec/Documents/OCR Text/Text/Collected Tokenized/Démonomanie Repair_corrected.txt
4. /home/lucas-jerusalimiec/Documents/OCR Text/Text/Collected Tokenized/Harangue - Fontainebleau_corrected.txt
5. /home/lucas-jerusalimiec/Documents/OCR Text/Text/Collected Tokenized/Harangue - Orléans 2_corrected.txt
6. /home/lucas-jerusalimiec/Documents/OCR Text/Text/Collected Tokenized/Harangue - Orléans_corrected.txt
7. /home/lucas-jerusalimiec/Documents/OCR Text/Text/Collected Tokenized/Harangue - Poissy_corrected.txt
8. /home/lucas-jerusalimiec/Documents/OCR Text/Text/Collected Tokenized/Harangue - Rouen_corrected.txt
9. /home/lucas-jerusalimiec/Documents/OCR Text/Text/Collected Tokenized/Harangue - Saint Germain_corrected.txt
10. /home/lucas-jerusalimiec/D

Enter the numbers of the files you want to select, separated by commas:  2,4,5,6,7,8,9,10,11,12,13,14,15,16,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37
Enter '1' to output to ./Concatenated or '2' to output to the current working directory:  2


Discours des raisons_corrected.txt
Harangue - Fontainebleau_corrected.txt
Harangue - Orléans 2_corrected.txt
Harangue - Orléans_corrected.txt
Harangue - Poissy_corrected.txt
Harangue - Rouen_corrected.txt
Harangue - Saint Germain_corrected.txt
Harangue - lit de justice_corrected.txt
Harangue - ouverture de parlement_corrected.txt
Harangue - parlement 2_corrected.txt
Harangue - parlement 3_corrected.txt
Harangue - parlement_corrected.txt
Harangue - religion_corrected.txt
Harangue - septembre_corrected.txt
Lit de justice_corrected.txt
Memoire - Namur_corrected.txt
Memoire - le but_corrected.txt
Memoire au roi_corrected.txt
Memoires d'État Refuge_corrected.txt
Memoires d'état_corrected.txt
Remonstrances - Royaume_corrected.txt
Remonstrances - parlement_corrected.txt
Traite Justice VII_corrected.txt
Traite Justice VI_corrected.txt
Traite Justice V_corrected.txt
Traité Justice III_corrected.txt
Traité Justice II_corrected.txt
Traité Justice IV_corrected.txt
Traité Justice I_corrected.txt
Co