# JapDic 
Version 2.0
Made by Vik

In [18]:
from notion_client import Client
import os
from dotenv import load_dotenv
import pandas as pd
import glob

In [19]:
load_dotenv("locals.env")
token = os.getenv("NOTION_TOKEN")
table_id = os.getenv("NOTION_TABLE_ID")
page_id = os.getenv("NOTION_JAPDIC_DEMO_PAGE_ID")

In [20]:
# database_id = os.getenv("NOTION_DATABASE_ID")

client = Client(auth=token)
page = client.pages.retrieve(page_id=page_id)

## Add header case

In [21]:
# Function to add text to a page
def add_text_to_page(page_id: str, text: str):
    """
    Adds text as a paragraph block to a Notion page

    Args:
        page_id (str): The ID of the Notion page
        text (str): The text content to add
    """
    try:
        response = client.blocks.children.append(
            block_id=page_id,
            children=[
                {
                    "type": "paragraph",
                    "paragraph": {
                        "rich_text": [
                            {"type": "text", "text": {"content": text, "link": None}}
                        ],
                        "color": "default",
                    },
                }
            ],
        )
        return response
    except Exception as e:
        print(f"Error adding text to page: {e}")
        return None

## Table

In [22]:
def check_for_existing_table(page_id: str) -> str | None:
    """
    Checks if a table already exists on the page

    Args:
        page_id (str): The ID of the Notion page

    Returns:
        str | None: Table ID if found, None otherwise
    """
    try:
        # Get all blocks on the page
        response = client.blocks.children.list(block_id=page_id)

        # Look for a table block
        for block in response["results"]:
            if block["type"] == "table":
                return block["id"]

        return None
    except Exception as e:
        print(f"Error checking for existing table: {e}")
        return None

In [23]:
def create_japanese_table(page_id: str):
    """
    Creates a table with columns: ID, ROOMA-JI, Pronounced, English

    Args:
        page_id (str): The ID of the parent page where the table will be created
    """
    try:
        response = client.blocks.children.append(
            block_id=page_id,
            children=[
                {
                    "type": "table",
                    "table": {
                        "table_width": 4,
                        "has_column_header": True,
                        "has_row_header": False,
                        "children": [
                            # Header row
                            {
                                "type": "table_row",
                                "table_row": {
                                    "cells": [
                                        [{"type": "text", "text": {"content": "ID"}}],
                                        [
                                            {
                                                "type": "text",
                                                "text": {"content": "ROOMA-JI"},
                                            }
                                        ],
                                        [
                                            {
                                                "type": "text",
                                                "text": {"content": "Pronounced"},
                                            }
                                        ],
                                        [
                                            {
                                                "type": "text",
                                                "text": {"content": "English"},
                                            }
                                        ],
                                    ]
                                },
                            }
                            # You can add more rows here if needed
                        ],
                    },
                }
            ],
        )
        return response
    except Exception as e:
        print(f"Error creating table: {e}")
        return None


In [24]:
def get_or_create_table(page_id: str) -> str | None:
    """
    Gets existing table ID or creates new table

    Args:
        page_id (str): The ID of the Notion page

    Returns:
        str | None: Table ID if found/created, None if failed
    """
    # First check for existing table
    table_id = check_for_existing_table(page_id)

    if table_id:
        print("Found existing table")
        return table_id

    # If no table exists, create new one
    print("Creating new table")
    response = create_japanese_table(page_id)
    if response and "results" in response:
        table_id = response["results"][0]["id"]

        # Store the table ID in .env file
        with open("locals.env", "a") as f:
            f.write(f"\nNOTION_TABLE_ID={table_id}")

        return table_id

    return None

In [25]:
def add_table_row(table_id: str, id_num: int, rooma_ji: str, english: str):
    """
    Adds a new row to the table

    Args:
        table_id (str): The ID of the table
        id_num (int): The ID number
        rooma_ji (str): The romanized Japanese text
        english (str): The English translation
    """
    try:
        response = client.blocks.children.append(
            block_id=table_id,
            children=[
                {
                    "type": "table_row",
                    "table_row": {
                        "cells": [
                            [{"type": "text", "text": {"content": str(id_num)}}],
                            [{"type": "text", "text": {"content": rooma_ji}}],
                            [
                                {"type": "text", "text": {"content": ""}}
                            ],  # Empty Pronounced cell
                            [{"type": "text", "text": {"content": english}}],
                        ]
                    },
                }
            ],
        )
        return response
    except Exception as e:
        print(f"Error adding row: {e}")
        return None

In [26]:
def get_last_row_id(table_id: str) -> int:
    """
    Gets the ID from the last row in the table

    Args:
        table_id (str): The ID of the table

    Returns:
        int: The last ID used in the table, or 0 if table is empty
    """
    try:
        all_rows = []
        start_cursor = None

        while True:
            # Get page of results
            response = client.blocks.children.list(
                block_id=table_id, start_cursor=start_cursor, page_size=100
            )

            all_rows.extend(response["results"])

            # Check if there are more pages
            if not response.get("has_more"):
                break

            start_cursor = response["next_cursor"]

        # Skip the header row and get the last row
        if len(all_rows) <= 1:  # Only header row or empty table
            return 0

        last_row = all_rows[-1]
        if last_row["type"] == "table_row":
            # Get the ID from the first cell
            id_cell = last_row["table_row"]["cells"][0]
            if id_cell and len(id_cell) > 0:
                return int(id_cell[0]["text"]["content"])

        return 0

    except Exception as e:
        print(f"Error getting last row ID: {e}")
        return 0

In [27]:
# Get or create table if needed
if not table_id:
    table_id = get_or_create_table(page_id)
    print(table_id)
    if not table_id:
        print("Failed to get or create table")
        exit(1)

In [28]:
def get_latest_data_file():
    """
    Gets the most recently added file in the data directory

    Returns:
        str: Name of the most recently added file
    """
    data_dir = "data"
    files = []

    # Get all files in data directory with creation times
    for filename in os.listdir(data_dir):
        filepath = os.path.join(data_dir, filename)
        if os.path.isfile(filepath):
            creation_time = os.path.getctime(filepath)
            files.append((filename, creation_time))

    if not files:
        return None

    # Sort by creation time and get most recent
    latest_file = sorted(files, key=lambda x: x[1], reverse=True)[0][0]
    print(f"Latest file found: {latest_file}")
    return latest_file


Code to add rows to table:

In [29]:
# Read the words from the text file
def sendData(new_words):
    """
    Adds new words to the Notion table

    Args:
        new_words (list): List of tuples (english, romaji) containing only new words to add
    """
    next_id = get_last_row_id(table_id) + 1
    for english_word, romaji_word in new_words:
        add_table_row(table_id, next_id, romaji_word, english_word)
        next_id += 1

In [30]:
def get_existing_words(table_id):
    """
    Gets all existing word pairs from the Notion table

    Returns:
        set: Set of (english, romaji) tuples representing existing word pairs
    """
    existing_words = set()

    # Query all rows from the table
    results = client.blocks.children.list(block_id=table_id)

    # Skip the header row and process data rows
    for row in results["results"][1:]:  # Skip first row (header)
        if row["type"] == "table_row":
            cells = row["table_row"]["cells"]
            if len(cells) >= 4:  # Ensure we have enough cells
                english = cells[3][0]["text"]["content"] if cells[3] else ""
                romaji = cells[1][0]["text"]["content"] if cells[1] else ""
                existing_words.add((english.lower(), romaji.upper()))

    return existing_words

In [31]:
def check_duplicates(latest_file, separator):
    """
    Checks which words from the file are already in the Notion table

    Args:
        latest_file (str): Name of the file to check
        separator (str): Separator used in the file between English and Romaji

    Returns:
        tuple: Lists of new and duplicate word pairs
    """
    existing_words = get_existing_words(table_id)
    new_words = []
    duplicates = []

    with open(f"data/{latest_file}", "r") as f:
        for line in f:
            words = line.strip().split(separator)
            if len(words) >= 2:
                english = words[0].lower()
                romaji = words[1].upper()
                word_pair = (english, romaji)

                if word_pair in existing_words:
                    duplicates.append(word_pair)
                else:
                    new_words.append(word_pair)

    print(f"Found {len(new_words)} new words and {len(duplicates)} duplicates")
    if duplicates:
        print("Duplicate words:")
        for eng, rom in duplicates:
            print(f"  {eng} = {rom}")

    return new_words, duplicates


In [32]:
def main_executer():
    latest_file = get_latest_data_file()
    sp = " = "
    new_words, duplicates = check_duplicates(latest_file=latest_file, separator=sp)
    if new_words:
        print(f"Adding {len(new_words)} new words to the table...")
        sendData(new_words)
        print("Done!")
    else:
        print("No new words to add.")

# Main exec lines:

In [34]:
# Use run all option for proper add
main_executer()

Latest file found: 02_12_list.txt
Found 4 new words and 0 duplicates
Adding 4 new words to the table...
Done!


In [36]:
def extract_to_dataframe():
    """
    Extracts all words from data files into a pandas DataFrame and saves as txt
    """

    # Get all data files
    data_files = glob.glob("data/*.txt")

    # Store all word pairs
    all_words = []

    # Process each file
    for file in data_files:
        with open(file, "r") as f:
            for line in f:
                # Skip empty lines
                if not line.strip():
                    continue

                # Split on = and clean up
                parts = line.strip().split(" = ")
                if len(parts) >= 2:
                    english = parts[0].strip().lower()
                    romaji = parts[2].strip()
                    all_words.append((english, romaji))

    # Create DataFrame
    df = pd.DataFrame(all_words, columns=["English", "Romaji"])

    # Remove duplicates
    df = df.drop_duplicates()

    # Sort alphabetically by English
    df = df.sort_values("English")

    # Save to txt file
    output_file = "extract/complete_dictionary.txt"
    df.to_csv(output_file, sep="=", index=False, header=False)

    print(f"Saved {len(df)} unique word pairs to {output_file}")
    return df


# Example usage:
# df = extract_to_dataframe()


IndexError: list index out of range