In [None]:
import os
import tkinter as tk
from tkinter import ttk, filedialog
import PyPDF2
import pandas as pd
from ttkthemes import ThemedStyle

# Function to extract text from a PDF file
def extract_text_from_pdf(pdf_file):
    text = ""
    with open(pdf_file, 'rb') as file:
        pdf_reader = PyPDF2.PdfReader(file)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text

# Function to process the selected folder and generate an Excel file
def process_folder():
    folder_path = folder_entry.get()
    keyword_list = keyword_entry.get().split('|')
    program = program_entry.get()  # Get the program parameter
    
    # List only PDF and TXT files in the folder
    file_list = [os.path.join(folder_path, filename) for filename in os.listdir(folder_path) if filename.lower().endswith(('.pdf', '.txt'))]

    output_data = []

    for file in file_list:
        text = extract_text_from_pdf(file) if file.lower().endswith('.pdf') else extract_text_from_txt(file)
        filename = os.path.basename(file)  # Use the original file name
        keyword_tally = {keyword: text.lower().count(keyword.lower()) for keyword in keyword_list}
        
        for keyword, count in keyword_tally.items():
            if count > 0:  # Only include rows with count greater than 0
                binary = 'Yes'
                output_data.append({'Program': program, 'Filename': filename, 'Word': keyword, 'Count': count if count > 0 else ''})

    # Create a DataFrame from the list of dictionaries
    df = pd.DataFrame(output_data)

    # Save the DataFrame to an Excel file with the desired column order
    output_file = os.path.join(folder_path, "keyword_counts.xlsx")
    df = df[['Program', 'Filename', 'Word', 'Count']]  # Reorder columns
    df.to_excel(output_file, index=False)

    result_label.config(text=f"Output Excel file saved as: {output_file}")

# Create the main GUI window
root = tk.Tk()
root.title("Keyword Matching Tool")

# Apply a ttk theme for a visually appealing look
style = ThemedStyle(root)
style.set_theme("clam")

# Create and configure GUI elements with ttk widgets
folder_label = ttk.Label(root, text="Select Folder:")
folder_label.grid(row=0, column=0, padx=10, pady=10)
folder_entry = ttk.Entry(root)
folder_entry.grid(row=0, column=1, padx=10, pady=10)
folder_button = ttk.Button(root, text="Browse", command=lambda: folder_entry.insert(0, filedialog.askdirectory()))
folder_button.grid(row=0, column=2, padx=10, pady=10)

keyword_label = ttk.Label(root, text="Keywords (separated by '|'):")
keyword_label.grid(row=1, column=0, padx=10, pady=10)
keyword_entry = ttk.Entry(root)
keyword_entry.grid(row=1, column=1, padx=10, pady=10)

program_label = ttk.Label(root, text="Program:")
program_label.grid(row=2, column=0, padx=10, pady=10)
program_entry = ttk.Entry(root)
program_entry.grid(row=2, column=1, padx=10, pady=10)

process_button = ttk.Button(root, text="Process Folder", command=process_folder)
process_button.grid(row=3, column=0, columnspan=3, padx=10, pady=10)

result_label = ttk.Label(root, text="", font=("Arial", 12, "bold"))
result_label.grid(row=4, column=0, columnspan=3, padx=10, pady=10)

# Add some padding to all widgets
for child in root.winfo_children():
    child.grid_configure(padx=5, pady=5)

root.mainloop()


2023-10-19 16:42:35.204 python[71076:7846851] +[CATransaction synchronize] called within transaction
