In [2]:
import os
from scipy.stats import fisher_exact

def tokenize_file(filepath):
    with open(filepath, 'r') as file:
        return file.read().split()

def list_files_in_directory(directory):
    return sorted([f for f in os.listdir(directory) if f.endswith('.txt')])  # Sort files alphabetically

def choose_files(files):
    print("Select files to use for calculating r (enter numbers, ranges separated by commas, or type 'all' to select all files):")
    for i, file in enumerate(files):
        print(f"{i + 1}. {file}")

    choices = input("Enter your choices: ").strip().lower()
    if choices == 'all':
        return files
    
    selected_files = []
    for choice in choices.split(','):
        if '-' in choice:
            start, end = map(int, choice.split('-'))
            selected_files.extend(files[start - 1:end])
        else:
            selected_files.append(files[int(choice) - 1])
    
    return sorted(set(selected_files))  # Remove duplicates and sort

def choose_single_file(files):
    print("Select a target file:")
    for i, file in enumerate(files):
        print(f"{i + 1}. {file}")
    choice = int(input("Enter your choice: "))
    return files[choice - 1]

def calculate_fishers_exact():
    # Get the current working directory
    current_directory = os.getcwd()
    
    files = list_files_in_directory(current_directory)
    
    if not files:
        print("No .txt files found in the current working directory.")
        return
    
    # Choose multiple files for calculating r
    file_paths = choose_files(files)
    all_words = []
    
    for file_path in file_paths:
        all_words.extend(tokenize_file(os.path.join(current_directory, file_path)))
    
    while True:
        # Input target word
        target_word = input("Enter the target word: ")
        
        count = all_words.count(target_word)
        wc = len(all_words)
        
        r = count / wc
        
        # Choose a single file for calculating a and wc
        single_file_path = choose_single_file(files)
        single_file_words = tokenize_file(os.path.join(current_directory, single_file_path))
        
        a = single_file_words.count(target_word)
        wc_single = len(single_file_words)
        
        b = wc_single - a
        c = round(r * wc_single)
        d = wc_single - c
        
        table = [[a, b], [c, d]]
        
        # Run the Fisher's Exact Test
        fe = fisher_exact(table, alternative='greater')
        
        # Get the p-value
        print("P-value:", fe.pvalue)
        
        # Ask the user whether to quit or calculate for a new target word
        choice = input("Do you want to quit or calculate Fisher's Exact Test for a new target word? (quit/new): ").strip().lower()
        if choice == 'quit':
            break

# Call the function
calculate_fishers_exact()