In [None]:
import os

os.chdir("..")
os.chdir("..")
os.chdir("..")


In [39]:
from pathlib import Path


class DataLoader:
    def __init__(self, base_path="extract_data_video/data_organize"):
        self.base_path = base_path

    def get_categories(self):
        return [self.base_path / path for path in os.listdir(self.base_path)]

    def get_subcategories(self, category_path):
        return [category_path / path for path in os.listdir(category_path)]

    def get_words(self, subcategory_path, full_path=True) -> list[str] | list[list]:
        if full_path:
            return [subcategory_path / word for word in os.listdir(subcategory_path)]
        else:
            return [word for word in os.listdir(subcategory_path)]

    def get_all_words(self, subcategory_name=None, full_path=True):
        palavras = []
        for categoria in self.get_categories():
            for subcat in self.get_subcategories(categoria):
                if subcategory_name and subcat.name != subcategory_name:
                    continue
                
                palavras.extend(self.get_words(subcat, full_path=full_path))

        return palavras

# DATA_PATH = Path("extract_data_video/data_organize")

# data_loader = DataLoader(base_path=DATA_PATH)

# for path_category in data_loader.get_categories():
#     list_path_sub_category = data_loader.get_subcategories(path_category)
    
#     for path_sub_category in list_path_sub_category:
#         list_path_words = data_loader.get_words(path_sub_category)
#         for path_words in list_path_words:
#             print(path_words)

In [62]:
import tkinter as tk
from pathlib import Path
import re

class TextReaderApp:
    def __init__(self, master, known_words):
        self.master = master
        self.known_words = set(word.lower() for word in known_words)

        # Widgets
        self.text_widget = tk.Text(master, wrap="word", font=("Arial", 14))
        self.text_widget.pack(expand=True, fill="both", padx=10, pady=10)

        self.send_button = tk.Button(master, text="Enviar", command=self._process_text)
        self.send_button.pack(pady=(0, 5))

        self.stats_label = tk.Label(master, justify="left", font=("Arial", 12), anchor="w")
        self.stats_label.pack(padx=10, pady=5, anchor="w")

        self._configure_tags()

    def _configure_tags(self):
        self.text_widget.tag_config("green", foreground="green")
        self.text_widget.tag_config("gray", foreground="gray")

    def _process_text(self):
        self.text_widget.tag_remove("green", "1.0", tk.END)
        self.text_widget.tag_remove("gray", "1.0", tk.END)

        text = self.text_widget.get("1.0", tk.END)
        self.total_words = 0
        self.known_words_count = 0

        self.text_widget.mark_set("insert", "1.0")
        words_with_pos = [(m.group(), m.start(), m.end()) for m in re.finditer(r"[a-zA-Z]+", text)]

        for word, start, end in words_with_pos:
            self.total_words += 1
            tag = "green" if word.lower() in self.known_words else "gray"
            self.text_widget.tag_add(tag, f"1.0+{start}c", f"1.0+{end}c")
            if tag == "green":
                self.known_words_count += 1

        self._show_statistics()

    def _show_statistics(self):
        total = self.total_words
        known = self.known_words_count
        unknown = total - known
        percent = (known / total * 100) if total else 0

        stats_text = (
            f"Total de palavras: {total}\n"
            f"Conhecidas: {known}\n"
            f"Desconhecidas: {unknown}\n"
            f"Compreensão: {percent:.1f}%"
        )
        self.stats_label.config(text=stats_text)


if __name__ == "__main__":
    DATA_PATH = Path("extract_data_video/data_organize")
    data_loader = DataLoader(base_path=DATA_PATH)
    words_that_i_know = data_loader.get_all_words(full_path=False)
    words_that_i_know.extend(["how", "many", "pieces", "you", "retrieve"])

    root = tk.Tk()
    root.title("Leitura de Texto")
    app = TextReaderApp(root, words_that_i_know)

    # Texto inicial (opcional)
    example_text = """How many-teste pieces you retrieve from your RAG system affects the result."""
    app.text_widget.insert("1.0", example_text)

    root.mainloop()
