In [None]:
import os
import csv
import nltk
from scipy.stats import fisher_exact
from openpyxl import Workbook, load_workbook
from itertools import combinations
from collections import Counter

# Function to prompt the user to select text files based on patterns
def prompt_pattern_files(text_files, pattern):
    selected_files = [file for file in text_files if file.startswith(pattern)]
    return selected_files

def generate_keyword_pairs(predefined_words):
    """
    Generate all possible combinations of keyword pairs from the predefined word list.
    """
    return list(combinations(predefined_words, 2))

def prompt_files(text_files, purpose):
    text_files = sorted(text_files, key=custom_file_sort_key)  # Custom sort for files
    print(f"Select the files for {purpose}:")
    for i, file in enumerate(text_files, start=1):
        print(f"{i}. {file}")
    
    selection = input("Enter the number of the file, a range (e.g., 1-3), multiple ranges (e.g., 1-3,5-7), a text pattern to select files, or type 'all' to select all files: ").strip()
    selected_files = []
    
    if selection.lower() == 'all':
        selected_files = text_files
    else:
        # Split the input by commas to handle multiple ranges or numbers
        parts = selection.split(',')
        for part in parts:
            part = part.strip()
            if '-' in part:  # If the part is a range
                try:
                    start, end = map(int, part.split('-'))
                    selected_files.extend(text_files[start-1:end])
                except ValueError:
                    print(f"Invalid range: {part}. Please provide ranges like '1-3'.")
            elif part.isdigit():  # If the part is a single number
                try:
                    selected_files.append(text_files[int(part) - 1])
                except IndexError:
                    print(f"Invalid number: {part}. Please select numbers from the list.")
            else:  # If the part is treated as a pattern
                selected_files.extend(prompt_pattern_files(text_files, part))
    
    # Remove duplicates and sort the selected files
    selected_files = sorted(set(selected_files), key=custom_file_sort_key)
    
    print("Selected files:")
    for file in selected_files:
        print(file)
    
    return selected_files

# Custom sort key for file names
def custom_file_sort_key(filename):
    # Prioritize 'preface' higher than patterns like 'I.1'
    if 'preface' in filename.lower():
        return ('', filename.lower())  # Sort 'preface' first
    return (filename.lower(),)

# Function to process text files
def process_text_files(file_paths):
    combined_text = ""
    for file_path in file_paths:
        with open(file_path, 'r', encoding='utf-8') as file:
            combined_text += file.read().lower() + " "
    tokens = nltk.wordpunct_tokenize(combined_text)
    return nltk.Text(tokens)

# Function to find .txt files in a directory
def find_text_files(directory):
    return sorted([f for f in os.listdir(directory) if f.endswith('.txt')], key=custom_file_sort_key)  # Custom sort

# Function to list subfolders in the current directory
def list_subfolders():
    return sorted([f.name for f in os.scandir() if f.is_dir()])  # Sort folders alphabetically

# Function to prompt the user to select a subfolder or the current directory
def prompt_subfolder(subfolders):
    print("Select a subfolder or the current working directory:")
    print("0. Current Working Directory")
    for i, subfolder in enumerate(subfolders, start=1):
        print(f"{i}. {subfolder}")
    selected_index = int(input("Enter the number of the subfolder: "))
    return None if selected_index == 0 else subfolders[selected_index - 1]

# Function to get predefined target words
def get_predefined_target_words():
    return [
        ['citoyen', 'cour', 'domain', 'ressort'],  # List 1
        ['guerr', 'paix', 'police', 'religion'],  # List 2
        ['confess', 'demon', 'demoniaqu', 'diabl',
         'diabol', 'dieu', 'divin', 
        'hebrieu', 'impiet', 'preuv', 'question',   'sathan', 
        'sorceller', 'sorci',  'statut', 'sujet'],  # List 3
        ['arrest',  'conseil', 'conseiller', 'consul', 
         'couron', 'édict', 'iurisdict', 'jug', 'magistrat',
         'offic', 'offici', 'ordon', 'parlement',
        'seigneur', 'seigneurial', 'statut'],  # List 4
        ['absolu', 'bien', 'chos', 'civil', 'droit', 'estat', 'just', 'justic',
         'loi', 'maiest', 'princ', 'puissanc',
        'republ', 'roy', 'royal', 'royaum', 'souverain', 'souverainet', 'sujet']  # List 5
    ]

# Function to choose subdirectory for stopwords csv file
def choose_subdirectory(subdirectories):
    print("Select a subdirectory:")
    print("0. Current Working Directory")
    for i, subdir in enumerate(subdirectories, start=1):
        print(f"{i}. {subdir}")
    while True:
        try:
            choice = int(input("Enter your choice: "))
            if 0 <= choice <= len(subdirectories):
                return None if choice == 0 else subdirectories[choice - 1]
            else:
                print("Invalid selection. Please try again.")
        except ValueError:
            print("Please enter a number.")

# Function to read stopwords from a csv file
def read_stopwords(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        stopwords = []
        for row in reader:
            for word in row:
                stopwords.extend(word.split(','))
        return [word.strip() for word in stopwords]

# Function to find .csv files in a directory
def find_csv_files(directory):
    return sorted([f for f in os.listdir(directory) if f.endswith('.csv')])  # Sort files alphabetically

# Function to select files for the stopwords
def select_stopwords_file():
    print('Stopwords file selection')
    stopwords_subfolders = list_subfolders()
    selected_stopwords_subfolder = choose_subdirectory(stopwords_subfolders)
    
    # Check if the user selected a subfolder or the current directory
    stopwords_subfolder_path = os.getcwd() if selected_stopwords_subfolder is None else os.path.join(os.getcwd(), selected_stopwords_subfolder)
    
    # Find .csv files in the selected directory
    stopwords_files = find_csv_files(stopwords_subfolder_path)
    if stopwords_files:
        print('Select a stopwords file from the following list:')
        # Prompt the user to select a single .csv file
        selected_file = prompt_files(stopwords_files, "stopwords file")
        if selected_file:
            return selected_file[0], stopwords_subfolder_path  # Return the first selected file and its path
        else:
            print("No stopwords file selected.")
            return None, None
    else:
        print(f"No .csv stopwords files found in '{selected_stopwords_subfolder}'.")
        return None, None

# Function to select files for the rate dictionary
def select_rate_dictionary_files():
    print('Rate dictionary file selection')
    rate_dictionary_subfolders = list_subfolders()
    selected_rate_dictionary_subfolder = choose_subdirectory(rate_dictionary_subfolders)
    
    # Check if the user selected a subfolder or the current directory
    rate_dictionary_subfolder_path = os.getcwd() if selected_rate_dictionary_subfolder is None else os.path.join(os.getcwd(), selected_rate_dictionary_subfolder)
    
    # Find .txt files in the selected directory
    rate_dictionary_files = find_text_files(rate_dictionary_subfolder_path)
    if rate_dictionary_files:
        print('Select one or more rate dictionary files from the following list:')
        selected_files = prompt_files(rate_dictionary_files, "rate dictionary")
        if selected_files:
            return selected_files, rate_dictionary_subfolder_path  # Return the selected files and their path
        else:
            print("No rate dictionary files selected.")
            return [], None
    else:
        print(f"No .txt rate dictionary files found in '{selected_rate_dictionary_subfolder}'.")
        return [], None

def select_existing_xlsx_file():
    print("Select a directory to search for .xlsx files:")
    subfolders = list_subfolders()
    selected_subfolder = choose_subdirectory(subfolders)

    # Check if the user selected a subfolder or the current directory
    folder_path = os.getcwd() if selected_subfolder is None else os.path.join(os.getcwd(), selected_subfolder)

    # Find .xlsx files in the selected directory
    xlsx_files = [f for f in os.listdir(folder_path) if f.endswith('.xlsx')]
    if xlsx_files:
        print("Select an existing .xlsx file from the following list:")
        for i, file in enumerate(xlsx_files, start=1):
            print(f"{i}. {file}")
        while True:
            try:
                choice = int(input("Enter the number of the file you want to select or 0 to cancel: "))
                if 0 <= choice <= len(xlsx_files):
                    return None if choice == 0 else os.path.join(folder_path, xlsx_files[choice - 1])
                else:
                    print("Invalid selection. Please try again.")
            except ValueError:
                print("Please enter a number.")
    else:
        print(f"No .xlsx files found in '{folder_path}'.")
        return None

# Utility function to clean file names
def clean_file_name(file_name):
    """
    Clean the file name for display, including replacing specific patterns.
    """
    file_name = file_name.replace('_', '').replace('corrected', '').replace('stemmed', '')
    if 'Démonomanie' in file_name:
        file_name = file_name.replace('Démonomanie', 'Dém')
    if 'République' in file_name:
        file_name = file_name.replace('République', 'Rép')
   
    # Replace '911' with '11' and '910' with '10' (NEW CHANGE)
    file_name = file_name.replace('911', '11').replace('910', '10')  # <--- CHANGE HERE
   
    return os.path.splitext(file_name)[0]

# Function to process the subset of text files for KWIC and counts
def process_subset_files(file_paths):
    combined_text = ""
    for file_path in file_paths:
        with open(file_path, 'r') as f:
            file_contents = f.read().lower()
            combined_text += file_contents + " "
    tokens = nltk.wordpunct_tokenize(combined_text)
    return tokens

def get_kwic(sometargetterm, somelistofwords, window=10, excl_target=True, source_file=None):
    kwics = []
    for n, w in enumerate(somelistofwords):
        if w == sometargetterm:
            start = max(0, n - window)
            end = min(n + window + 1, len(somelistofwords))
            if excl_target:
                # Updated: Exclude keyword itself from the window
                k = [word for word in (somelistofwords[start:n] + somelistofwords[n + 1:end]) if word != sometargetterm]
            else:
                k = somelistofwords[start:end]
            kwics.append((k, source_file))
    return kwics
   

def add_to_count_dict(word, count_dict):
    if word in count_dict:
        count_dict[word] += 1
    else:
        count_dict[word] = 1

def get_fishers(someword, somecountdict, someratedict, alternative='greater'):
    r = someratedict[someword]
    wc = sum(somecountdict.values())
    a = somecountdict[someword]
    b = wc - a
    c = round(r * wc)
    d = wc - c
    p = fisher_exact([[a, b], [c, d]], alternative=alternative)[1]
    return p

def filter_collocates_with_removal(collocates, collocate_counts):
    """
    Allow the user to filter the list of significant collocates and remove any selected collocates in error.

    Parameters:
        collocates (list): A sorted list of significant collocates.
        collocate_counts (dict): A dictionary of collocates and their counts.

    Returns:
        list: A filtered list of collocates selected by the user.
    """

    # Allow the user to refine the minimum‐count threshold repeatedly
    while True:
        try:
            min_count = int(input("Enter the minimum count threshold for collocates to include: ").strip())
        except ValueError:
            print("Invalid input. Please enter a valid number.")
            continue

        filtered_collocates = [c for c in collocates if collocate_counts[c] >= min_count]
        print(f"\nCollocates with counts >= {min_count} ({len(filtered_collocates)} items):")
        for i, collocate in enumerate(filtered_collocates, start=1):
            print(f"{i}. {collocate} (Count: {collocate_counts[collocate]})")
    
        choice = input("\nType 'refine' to try a new threshold, or press Enter to proceed: ").strip().lower()
        if choice == 'refine':
            continue
        break
        

    print("\nYou can select collocates by entering:")
    print("- A single number (e.g., 3) to select one collocate.")
    print("- A range of numbers (e.g., 3-6) to select multiple collocates.")
    print("- Multiple selections separated by commas (e.g., 3,5-7,9).")
    print("- Type 'all' to select all collocates.")
    print("- Type 'done' to finalize your selection.")

    selected_collocates = []

    while True:
        selection = input("Enter your selection: ").strip()
        if selection.lower() == 'done':
            break
        elif selection.lower() == 'all':
            selected_collocates = filtered_collocates
            break

        try:
            parts = selection.split(',')
            for part in parts:
                part = part.strip()
                if '-' in part:  # Handle ranges
                    start, end = map(int, part.split('-'))
                    selected_collocates.extend(filtered_collocates[start-1:end])
                elif part.isdigit():  # Handle single numbers
                    selected_collocates.append(filtered_collocates[int(part) - 1])
                else:
                    print(f"Invalid selection: {part}. Please try again.")
        except (ValueError, IndexError):
            print(f"Invalid input: {selection}. Please try again.")

        # Remove duplicates and sort the selected collocates
        selected_collocates = sorted(set(selected_collocates), key=filtered_collocates.index)

        print("Currently selected collocates:")
        for collocate in selected_collocates:
            print(collocate)

    # Allow users to review and remove collocates selected in error
    while True:
        print("\nFinalized collocates:")
        for i, collocate in enumerate(selected_collocates, start=1):
            print(f"{i}. {collocate}")

        remove_error = input(
            "Would you like to remove any collocates selected in error? (yes/no): "
        ).strip().lower()
        if remove_error == 'yes':
            remove_selection = input(
                "Enter the numbers of the collocates to remove (e.g., 2,4-5): "
            ).strip()
            try:
                parts = remove_selection.split(',')
                to_remove = []
                for part in parts:
                    part = part.strip()
                    if '-' in part:  # Handle ranges
                        start, end = map(int, part.split('-'))
                        to_remove.extend(selected_collocates[start-1:end])
                    elif part.isdigit():  # Handle single numbers
                        to_remove.append(selected_collocates[int(part) - 1])
                    else:
                        print(f"Invalid selection: {part}. Please try again.")
                selected_collocates = [
                    collocate for collocate in selected_collocates
                    if collocate not in to_remove
                ]
            except (ValueError, IndexError):
                print(f"Invalid input: {remove_selection}. Please try again.")
        else:
            break

    return selected_collocates, min_count

def search_concordance(
    reference_files,         # files → build the rate dictionary
    analysis_files,          # files → do KWIC & Fisher tests
    predefined_word_lists,
    stops,
    alpha
):
    # --- A) window prompt & workbook setup (unchanged) ---
    window = int(input("Enter the window size for concordance: ").strip())

    append_to_existing = input("Do you want to append results to an existing .xlsx file? (yes/no): ").strip().lower()
    if append_to_existing == 'yes':
        existing_file = select_existing_xlsx_file()
        if existing_file:
            wb = load_workbook(existing_file)
            print(f"Appending to existing file: {existing_file}")
        else:
            print("No existing workbook selected. Creating a new workbook instead.")
            wb = Workbook()
    else:
        wb = Workbook()
        print("Creating a new workbook.")
    if 'Sheet' in wb.sheetnames:
        del wb['Sheet']

    if append_to_existing == 'yes' and existing_file:
        output_filepath = existing_file
    else:
        output_filename = input("Enter a unique name for the .xlsx file (without extension): ").strip() or "distinct_collocates"
        output_filepath = os.path.join("concordances", f"{output_filename}.xlsx")

    # --- B) Build expected_rates from reference_files ---
    ref_paths  = [os.path.join(os.getcwd(), f) for f in reference_files]
    ref_tokens = process_subset_files(ref_paths)
    ref_counts = {}
    # sum(predefined_word_lists, []) flattens all hypotheses lists
    all_targets = set(sum(predefined_word_lists, []))
    for tok in ref_tokens:
        if tok not in stops and tok not in all_targets:
            add_to_count_dict(tok, ref_counts)

    total_ref = sum(ref_counts.values())
    if total_ref == 0:
        raise ValueError("Reference corpus is empty after filtering!")
    expected_rates = {w: c / total_ref for w, c in ref_counts.items()}

    # --- C) Loop over each hypothesis list ---
    for idx, predefined_words in enumerate(predefined_word_lists, start=1):
        if input(f"Skip Hypothesis {idx}? (yes/no): ").strip().lower() == 'yes':
            print(f"Skipping Hypothesis {idx}.")
            continue

        print(f"Processing Hypothesis {idx}.")
        ws = wb.create_sheet(title=f"Hypothesis {idx}")
        headers = ['Word'] + [clean_file_name(f) for f in analysis_files] + ['Total']
        ws.append(headers)
        predefined_words = sorted(predefined_words)
        keyword_pairs    = generate_keyword_pairs(predefined_words)

        # --- C1) Build counts_by_file_kw (one map only) ---
        counts_by_file_kw = {
            f: { kw: Counter() for kw in predefined_words }
            for f in analysis_files
        }
        
        for f in analysis_files:
            tokens = process_subset_files([os.path.join(os.getcwd(), f)])
            for kw in predefined_words:
                for context, _ in get_kwic(kw, tokens, window):
                    uniq = set(context) - {kw}
                    for c in uniq:
                        if c in stops:
                            continue
                        # count every collocate (including when c is itself one of your keywords)
                        counts_by_file_kw[f][kw][c] += 1
        
     
        # --- C2) Fisher & user‐filter (fixed file_total calculation) ---
        all_significant = set()
        from collections import Counter as _Counter
        collocate_counts = _Counter()
        
        for f, cnts in counts_by_file_kw.items():
            # flatten all kw‐buckets into one counter
            flat = _Counter()
            for kw in predefined_words:
                flat.update(cnts[kw])
            file_total = sum(flat.values())
            if file_total == 0:
                continue
        
            for coll, obs in flat.items():
                exp_cnt = round(expected_rates.get(coll, 0.0) * file_total)
                a, b = obs, file_total - obs
                c, d = exp_cnt, file_total - exp_cnt
                p = fisher_exact([[a, b], [c, d]], alternative='greater')[1]
                if p < alpha:
                    all_significant.add(coll)
                    collocate_counts[coll] += obs

        print(f"\n=== Hypothesis {idx} target words ===")
        print(", ".join(predefined_words))
        print("=======================================\n")

        
        # … just before you call filter_collocates_with_removal() …
        # 1) Identify any hypothesis keywords that are significant collocates
        auto_keywords = [kw for kw in predefined_words if kw in all_significant]
        
        # 2) Build the manual pool by removing auto_keywords
        manual_pool = sorted(set(all_significant) - set(auto_keywords))
        
        # 3) Prompt the user only on the manual pool
        selected, min_count = filter_collocates_with_removal(manual_pool, collocate_counts)
        
        # 4) Merge the hidden auto_keywords back into selected
        for kw in auto_keywords:
            if kw not in selected:
                selected.append(kw)
        if auto_keywords:
            print(f"\nAutomatically included hidden keyword collocates: {auto_keywords}\n")
        
        # --- C3) Write main table and build summary_by_kw ---
        summary_by_kw = { kw: set() for kw in predefined_words }
        for kw in predefined_words:
            row = [kw]
            row_total = 0
        
            for f in analysis_files:
                # restrict to only those you selected as significant
                coll_hits = {
                    c for c, cnt in counts_by_file_kw[f][kw].items()
                    if c in selected
                }
                # accumulate to summary (includes both “normal” collocates and keyword–keyword)
                summary_by_kw[kw].update(coll_hits)
        
                hits = len(coll_hits)
                row.append(hits)          # <- integer only
                row_total += hits
        
            row.append(row_total)        # <- integer only
            ws.append(row)
        
        # --- C4) Totals row (integers only) ---
        column_totals = [0]*len(analysis_files)
        for r in range(len(predefined_words)):
            for c in range(len(analysis_files)):
                val = ws.cell(row=r+2, column=c+2).value or 0
                column_totals[c] += val
        grand_total = sum(column_totals)
        ws.append(['Total'] + column_totals + [grand_total])
        
        # --- C5) Single summary list below the table ---
        ws.append([])
        ws.append(["Keyword", "Significant Collocates"])
        for kw in predefined_words:
            coll_list = ", ".join(sorted(summary_by_kw[kw]))
            ws.append([kw, coll_list])

        # --- C7) Footer info & optional save/exit per‐hypothesis ---
        ws.append([]); ws.append(['p-value threshold:', alpha])
        ws.append(['window size:', window])
        ws.append(['minimum count threshold:', min_count])

        if input(f"Save results for Hypothesis {idx}? (yes/no): ").strip().lower() == 'yes':
            os.makedirs('concordances', exist_ok=True)
            wb.save(output_filepath)
            print(f"Results up to Hypothesis {idx} saved to {output_filepath}.")

        if input("Exit after this hypothesis? (yes/no): ").strip().lower() == 'yes':
            print("Exiting.")
            return

    # --- D) Final save of workbook ---
    os.makedirs('concordances', exist_ok=True)
    wb.save(output_filepath)
    print(f"Concordance has been saved to {output_filepath}.")
    
# Example usage
use_predefined = input("Do you want to use predefined target word lists (yes/no)? ").strip().lower() == 'yes'
if use_predefined:
    predefined_word_lists = get_predefined_target_words()
else:
    predefined_word_lists = [input("Enter words for a group separated by spaces: ").strip().split() for _ in range(5)]

alpha = float(input("Enter the value for alpha: ").strip())

stopwords_file, stopwords_path = select_stopwords_file()
if stopwords_file:
    stops = read_stopwords(os.path.join(stopwords_path, stopwords_file))
    rate_dictionary_files, rate_dictionary_path = select_rate_dictionary_files()
    if rate_dictionary_files:
       #    build full path if needed:
        ref_files = [
            os.path.join(rate_dictionary_path, f)
            for f in rate_dictionary_files
        ]
        # 2) pick analysis files
        anal_files = prompt_files(find_text_files(os.getcwd()), "analysis (KWIC)")
        # 3) call with both lists
        search_concordance(ref_files, anal_files, predefined_word_lists, stops, alpha)
    else:
        print("No rate dictionary files selected.")
else:
    print("No stopwords file selected.")

Do you want to use predefined target word lists (yes/no)?  yes
Enter the value for alpha:  0.10


Stopwords file selection
Select a subdirectory:
0. Current Working Directory
1. .ipynb_checkpoints
2. concordances


Enter your choice:  0


Select a stopwords file from the following list:
Select the files for stopwords file:
1. stop_words.csv


Enter the number of the file, a range (e.g., 1-3), multiple ranges (e.g., 1-3,5-7), a text pattern to select files, or type 'all' to select all files:  1


Selected files:
stop_words.csv
Rate dictionary file selection
Select a subdirectory:
0. Current Working Directory
1. .ipynb_checkpoints
2. concordances


Enter your choice:  0


Select one or more rate dictionary files from the following list:
Select the files for rate dictionary:
1. Démonomanie preface Repair_corrected_stemmed.txt
2. République preface_corrected_stemmed.txt
3. Discours des raisons_corrected_stemmed.txt
4. Démonomanie I.1_corrected_stemmed.txt
5. Démonomanie I.2_corrected_stemmed.txt
6. Démonomanie I.3_corrected_stemmed.txt
7. Démonomanie I.4_corrected_stemmed.txt
8. Démonomanie I.5_corrected_stemmed.txt
9. Démonomanie I.6_corrected_stemmed.txt
10. Démonomanie I.7_corrected_stemmed.txt
11. Démonomanie II.1_corrected_stemmed.txt
12. Démonomanie II.2_corrected_stemmed.txt
13. Démonomanie II.3_corrected_stemmed.txt
14. Démonomanie II.4_corrected_stemmed.txt
15. Démonomanie II.5_corrected_stemmed.txt
16. Démonomanie II.6_corrected_stemmed.txt
17. Démonomanie II.7_corrected_stemmed.txt
18. Démonomanie II.8_corrected_stemmed.txt
19. Démonomanie III.1_corrected_stemmed.txt
20. Démonomanie III.2_corrected_stemmed.txt
21. Démonomanie III.3_corrected_st

Enter the number of the file, a range (e.g., 1-3), multiple ranges (e.g., 1-3,5-7), a text pattern to select files, or type 'all' to select all files:  all


Selected files:
Démonomanie preface Repair_corrected_stemmed.txt
République preface_corrected_stemmed.txt
Discours des raisons_corrected_stemmed.txt
Démonomanie I.1_corrected_stemmed.txt
Démonomanie I.2_corrected_stemmed.txt
Démonomanie I.3_corrected_stemmed.txt
Démonomanie I.4_corrected_stemmed.txt
Démonomanie I.5_corrected_stemmed.txt
Démonomanie I.6_corrected_stemmed.txt
Démonomanie I.7_corrected_stemmed.txt
Démonomanie II.1_corrected_stemmed.txt
Démonomanie II.2_corrected_stemmed.txt
Démonomanie II.3_corrected_stemmed.txt
Démonomanie II.4_corrected_stemmed.txt
Démonomanie II.5_corrected_stemmed.txt
Démonomanie II.6_corrected_stemmed.txt
Démonomanie II.7_corrected_stemmed.txt
Démonomanie II.8_corrected_stemmed.txt
Démonomanie III.1_corrected_stemmed.txt
Démonomanie III.2_corrected_stemmed.txt
Démonomanie III.3_corrected_stemmed.txt
Démonomanie III.4_corrected_stemmed.txt
Démonomanie III.5_corrected_stemmed.txt
Démonomanie III.6_corrected_stemmed.txt
Démonomanie IV.1_corrected_stemme

Enter the number of the file, a range (e.g., 1-3), multiple ranges (e.g., 1-3,5-7), a text pattern to select files, or type 'all' to select all files:  Dém,Rép


Selected files:
Démonomanie preface Repair_corrected_stemmed.txt
République preface_corrected_stemmed.txt
Démonomanie I.1_corrected_stemmed.txt
Démonomanie I.2_corrected_stemmed.txt
Démonomanie I.3_corrected_stemmed.txt
Démonomanie I.4_corrected_stemmed.txt
Démonomanie I.5_corrected_stemmed.txt
Démonomanie I.6_corrected_stemmed.txt
Démonomanie I.7_corrected_stemmed.txt
Démonomanie II.1_corrected_stemmed.txt
Démonomanie II.2_corrected_stemmed.txt
Démonomanie II.3_corrected_stemmed.txt
Démonomanie II.4_corrected_stemmed.txt
Démonomanie II.5_corrected_stemmed.txt
Démonomanie II.6_corrected_stemmed.txt
Démonomanie II.7_corrected_stemmed.txt
Démonomanie II.8_corrected_stemmed.txt
Démonomanie III.1_corrected_stemmed.txt
Démonomanie III.2_corrected_stemmed.txt
Démonomanie III.3_corrected_stemmed.txt
Démonomanie III.4_corrected_stemmed.txt
Démonomanie III.5_corrected_stemmed.txt
Démonomanie III.6_corrected_stemmed.txt
Démonomanie IV.1_corrected_stemmed.txt
Démonomanie IV.2_corrected_stemmed.tx

Enter the window size for concordance:  15
Do you want to append results to an existing .xlsx file? (yes/no):  yes


Select a directory to search for .xlsx files:
Select a subdirectory:
0. Current Working Directory
1. .ipynb_checkpoints
2. concordances


Enter your choice:  2


Select an existing .xlsx file from the following list:
1. 15WindowSummary - Archive.xlsx
2. 15winsum.xlsx
3. most_distinct_collocates_20_window.xlsx


Enter the number of the file you want to select or 0 to cancel:  2


Appending to existing file: /home/lucas-jerusalimiec/Documents/OCR Text/Text/Sectionized/lemmatized/concordances/15winsum.xlsx


Skip Hypothesis 1? (yes/no):  yes


Skipping Hypothesis 1.


Skip Hypothesis 2? (yes/no):  yes


Skipping Hypothesis 2.


Skip Hypothesis 3? (yes/no):  yes


Skipping Hypothesis 3.


Skip Hypothesis 4? (yes/no):  no


Processing Hypothesis 4.

=== Hypothesis 4 target words ===
arrest, conseil, conseiller, consul, couron, iurisdict, jug, magistrat, offic, offici, ordon, parlement, seigneur, seigneurial, statut, édict



Enter the minimum count threshold for collocates to include:  10



Collocates with counts >= 10 (175 items):
1. abus (Count: 11)
2. advis (Count: 26)
3. affair (Count: 21)
4. age (Count: 14)
5. amend (Count: 12)
6. ancien (Count: 17)
7. annuel (Count: 14)
8. appel (Count: 46)
9. appelloit (Count: 11)
10. ariftocrat (Count: 12)
11. aristocrat (Count: 36)
12. bien (Count: 362)
13. bourgeois (Count: 12)
14. canton (Count: 20)
15. capitain (Count: 25)
16. cas (Count: 50)
17. cenfeur (Count: 32)
18. chang (Count: 26)
19. charg (Count: 71)
20. chois (Count: 10)
21. chos (Count: 95)
22. citoyen (Count: 21)
23. civil (Count: 18)
24. cognoifl (Count: 15)
25. cognoiftr (Count: 14)
26. colleg (Count: 55)
27. collegu (Count: 20)
28. command (Count: 87)
29. commeil (Count: 10)
30. commifl (Count: 13)
31. commissair (Count: 34)
32. commission (Count: 44)
33. commád (Count: 10)
34. condamn (Count: 23)
35. continu (Count: 11)
36. corp (Count: 21)
37. cour (Count: 59)
38. coustum (Count: 29)
39. crim (Count: 11)
40. debvoir (Count: 11)
41. declar (Count: 10)
42. defe