In [3]:
import os
import pandas as pd

def create_image_log(user_name, image_folder, log_path):
    os.makedirs(log_path, exist_ok=True)
    log_file = os.path.join(log_path, f"{user_name}_image_log.csv")
    images = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg'))]
    image_data = []

    if os.path.exists(log_file):
        log_df = pd.read_csv(log_file)
        if 'Processing Status' not in log_df.columns:
            log_df['Processing Status'] = 'Not Started'
        existing_images = set(log_df['Image Name'])
    else:
        log_df = pd.DataFrame(columns=['Image Name', 'Image Path', 'Status', 'HTML Link', 'Processing Status'])
        existing_images = set()

    for img in images:
        if img not in existing_images:
            image_data.append({
                'Image Name': img,
                'Image Path': os.path.join(image_folder, img),
                'Status': 'Not Review',
                'HTML Link': '',
                'Processing Status': 'Not Started'
            })

    if image_data:
        new_log_df = pd.DataFrame(image_data)
        log_df = pd.concat([log_df, new_log_df], ignore_index=True)

    log_df.to_csv(log_file, index=False)
    print(f"Log updated for {user_name} at {log_file}")
    return log_file


In [4]:
def sync_image_log(user_name, image_folder, log_path):
    log_file = os.path.join(log_path, f"{user_name}_image_log.csv")

    if not os.path.exists(log_file):
        print(f"No log file found for {user_name}. Creating a new one...")
        return create_image_log(user_name, image_folder, log_path)

    log_df = pd.read_csv(log_file)

    if 'Processing Status' not in log_df.columns:
        log_df['Processing Status'] = 'Not Started'

    images_in_folder = set(f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg')))
    images_in_log = set(log_df['Image Name'])

    new_images = images_in_folder - images_in_log
    for img in new_images:
        log_df = pd.concat([log_df, pd.DataFrame([{
            'Image Name': img,
            'Image Path': os.path.join(image_folder, img),
            'Status': 'Not Review',
            'HTML Link': '',
            'Processing Status': 'Not Started'
        }])], ignore_index=True)

    removed_images = images_in_log - images_in_folder
    log_df = log_df[~log_df['Image Name'].isin(removed_images)]

    log_df.to_csv(log_file, index=False)
    print(f"Log synchronized for {user_name}")

    return log_file

In [5]:
user_name = "Duc"
image_folder = f"/content/drive/MyDrive/user/dataset/table_ocr/image/local/{user_name}"
log_path = "/content/drive/MyDrive/user/file_logs/table_logs"

log_file = sync_image_log(user_name, image_folder, log_path)

No log file found for Duc. Creating a new one...


FileNotFoundError: [WinError 3] The system cannot find the path specified: '/content/drive/MyDrive/user/dataset/table_ocr/image/local/Duc'

In [6]:
import os
import pandas as pd
from PIL import Image
import ipywidgets as widgets
from IPython.display import display, clear_output

def review_images_gui(user_name, log_file, skip_reviewed=True):
    # Load log file
    log_df = pd.read_csv(log_file)
    log_df = log_df.reset_index(drop=True)

    # Thêm dropdown để lọc theo status
    status_filter = widgets.Dropdown(
        options=['All', 'Not Review', 'Accept', 'Decline'],
        value='Not Review' if skip_reviewed else 'All',
        description='Filter:',
        disabled=False,
    )

    def update_idx_list():
        if status_filter.value == 'All':
            return log_df.index.tolist()
        return log_df[log_df['Status'] == status_filter.value].index.tolist()

    # Khởi tạo danh sách index ban đầu
    idx_list = update_idx_list()
    if not idx_list:
        print("No images to display!")
        return

    # Khởi tạo widget
    image_widget = widgets.Image(format='png', layout=widgets.Layout(width='800px', height='auto', margin='10px 0'))
    status_label = widgets.HTML(value="<b>Reviewing:</b>")
    current_status_label = widgets.HTML(value="<b>Status:</b> Not Review")

    accept_button = widgets.Button(description="Accept", button_style="success")
    decline_button = widgets.Button(description="Decline", button_style="danger")
    skip_button = widgets.Button(description="Skip", button_style="info")
    back_button = widgets.Button(description="Back", button_style="warning")
    next_button = widgets.Button(description="Next", button_style="primary")
    show_latest_button = widgets.Button(description="Show Latest", button_style="warning")

    jump_to_input = widgets.BoundedIntText(
        value=0,
        min=0,
        max=len(log_df) - 1,
        step=1,
        description='Jump to:',
        layout=widgets.Layout(width='150px')
    )
    jump_to_button = widgets.Button(description="Go", button_style="info")

    # Khởi tạo jump_box với style display='none'
    jump_box = widgets.HBox(
        [jump_to_input, jump_to_button],
        layout=widgets.Layout(margin='0 0 0 20px', display='none')
    )

    output = widgets.Output()

    # Biến trạng thái
    current_index = idx_list[0]

    def load_image(idx):
        """Load ảnh từ log file."""
        img_path = log_df.loc[idx, 'Image Path']
        with open(img_path, "rb") as file:
            image_widget.value = file.read()
        status_label.value = f"<b>Reviewing:</b> {log_df.loc[idx, 'Image Name']} ({idx_list.index(idx) + 1}/{len(idx_list)})"
        current_status_label.value = f"<b>Status:</b> {log_df.loc[idx, 'Status']}"

    def handle_button_click(button):
        """Xử lý khi bấm nút Accept, Decline hoặc Skip."""
        nonlocal current_index, idx_list

        if button.description == "Accept":
            log_df.loc[current_index, 'Status'] = 'Accept'
        elif button.description == "Decline":
            log_df.loc[current_index, 'Status'] = 'Decline'

        # Cập nhật file log
        log_df.to_csv(log_file, index=False)

        # Cập nhật trạng thái hiện tại
        current_status_label.value = f"<b>Status:</b> {log_df.loc[current_index, 'Status']}"

        # Cập nhật idx_list nếu đang lọc theo trạng thái
        if status_filter.value != 'All':
            idx_list = update_idx_list()

        # Xử lý chuyển ảnh tiếp theo
        if button.description != "Skip":
            if current_index in idx_list:
                idx_list.remove(current_index)
        if idx_list:
            current_index = idx_list[0]
            load_image(current_index)
        else:
            status_label.value = f"<b>No more images in '{status_filter.value}' status!</b>"

    def handle_navigation(button):
        """Xử lý nút Back và Next."""
        nonlocal current_index, idx_list

        current_pos = idx_list.index(current_index)
        if button.description == "Back" and current_pos > 0:
            current_index = idx_list[current_pos - 1]
        elif button.description == "Next" and current_pos < len(idx_list) - 1:
            current_index = idx_list[current_pos + 1]

        load_image(current_index)

    def handle_jump(button):
        """Xử lý khi người dùng nhập số ảnh để nhảy đến."""
        nonlocal current_index, idx_list

        jump_to_idx = jump_to_input.value
        if 0 <= jump_to_idx < len(log_df):
            current_index = jump_to_idx
            load_image(current_index)
        else:
            status_label.value = f"<b>Index {jump_to_idx} is out of bounds.</b>"

    def handle_show_latest(button):
        """Xử lý khi bấm nút Show Latest."""
        nonlocal current_index, idx_list

        # Chọn ảnh đầu tiên chưa được review trong danh sách idx_list
        for idx in idx_list:
            if log_df.loc[idx, 'Status'] == 'Not Review':
                current_index = idx
                load_image(current_index)
                return

        status_label.value = "<b>No images left to review!</b>"

    def handle_filter_change(change):
        """Xử lý khi thay đổi bộ lọc."""
        nonlocal current_index, idx_list
        idx_list = update_idx_list()

        # Hiển thị hoặc ẩn jump_box dựa trên giá trị của filter
        if status_filter.value == 'All':
            jump_box.layout.display = 'flex'
        else:
            jump_box.layout.display = 'none'

        if idx_list:
            current_index = idx_list[0]
            load_image(current_index)
        else:
            status_label.value = f"<b>No images with '{status_filter.value}' status!</b>"
            image_widget.value = b''

    # Kết nối các events
    accept_button.on_click(handle_button_click)
    decline_button.on_click(handle_button_click)
    skip_button.on_click(lambda button: handle_button_click(skip_button))
    back_button.on_click(handle_navigation)
    next_button.on_click(handle_navigation)
    jump_to_button.on_click(handle_jump)
    show_latest_button.on_click(handle_show_latest)
    status_filter.observe(handle_filter_change, names='value')

    # Ẩn các nút nếu skip_reviewed=True
    if skip_reviewed:
        show_latest_button.layout.display = 'none'
        status_filter.disabled = True
        jump_box.layout.display = 'none'

    # Hiển thị ảnh đầu tiên
    load_image(current_index)

    # Hiển thị giao diện
    filter_box = widgets.HBox([status_filter], layout=widgets.Layout(margin='10px 0'))

    buttons_top = widgets.HBox([
        back_button,
        next_button,
        show_latest_button
    ], layout=widgets.Layout(margin='10px 0'))

    buttons_bottom = widgets.HBox([
        accept_button,
        decline_button,
        skip_button,
        jump_box
    ], layout=widgets.Layout(margin='10px 0'))

    display(widgets.VBox([
        filter_box,
        image_widget,
        status_label,
        current_status_label,
        buttons_top,
        buttons_bottom
    ]))

In [5]:
review_images_gui(user_name, log_file, skip_reviewed=False)

NameError: name 'log_file' is not defined