# Rename files

In [1]:
import os


def rename_files_in_directory(
    directory: str, 
    prefix: str = "f_", 
    extension: str = ".png"
) -> None:
    # Get all files in the directory
    files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
    
    # Sort files by name to keep consistent order
    files.sort()

    # Loop through and rename
    for i, filename in enumerate(files, start=1):
        # Create new filename like f_001.jpg
        new_name = f"{prefix}{i:03d}{extension}"
        
        # Build full paths
        old_path = os.path.join(directory, filename)
        new_path = os.path.join(directory, new_name)
        
        # Rename file
        os.rename(old_path, new_path)
        print(f"Renamed: {filename} â†’ {new_name}")

rename_files_in_directory('../input/bni-receipts/fraud/', prefix='f_', extension='.png')
rename_files_in_directory('../input/bni-receipts/genuine/', prefix='g_', extension='.png')

Renamed: f_001.png â†’ f_001.png
Renamed: f_002.png â†’ f_002.png
Renamed: f_003.png â†’ f_003.png
Renamed: f_004.png â†’ f_004.png
Renamed: f_005.png â†’ f_005.png
Renamed: f_006.png â†’ f_006.png
Renamed: f_007.png â†’ f_007.png
Renamed: f_008.png â†’ f_008.png
Renamed: f_009.png â†’ f_009.png
Renamed: f_010.png â†’ f_010.png
Renamed: f_011.png â†’ f_011.png
Renamed: f_012.png â†’ f_012.png
Renamed: f_013.png â†’ f_013.png
Renamed: f_014.png â†’ f_014.png
Renamed: f_015.png â†’ f_015.png
Renamed: f_016.png â†’ f_016.png
Renamed: f_017.png â†’ f_017.png
Renamed: f_018.png â†’ f_018.png
Renamed: f_019.png â†’ f_019.png
Renamed: f_020.png â†’ f_020.png
Renamed: f_021.png â†’ f_021.png
Renamed: f_022.png â†’ f_022.png
Renamed: f_023.png â†’ f_023.png
Renamed: f_024.png â†’ f_024.png
Renamed: f_025.png â†’ f_025.png
Renamed: g_001.png â†’ g_001.png
Renamed: g_002.png â†’ g_002.png
Renamed: g_003.png â†’ g_003.png
Renamed: g_004.png â†’ g_004.png
Renamed: g_005.png â†’ g_005.png
Renamed: g

# Generate Dataset Metadat

In [2]:
import os
import csv
from pathlib import Path
from PIL import Image, ExifTags

base = Path('../input/bni-receipts/')
rows = []
for label, folder in enumerate(["genuine", "fraud"]):
    for img_path in (base/folder).glob("*.png"):
        im = Image.open(img_path)
        width, height = im.size
        exif = im._getexif() or {}
        software = ""
        for k, v in exif.items():
            if ExifTags.TAGS.get(k) == "Software":
                software = v
        rows.append({
            "filename": img_path.name,
            "label": label,
            "bank_name": "Bank BNI",
            "edit_type": "none" if label==0 else "unknown",
            "image_source": "camera" if label==0 else "editor",
            "width": width,
            "height": height,
            "has_exif": 1 if exif else 0,
            "exif_software": software,
            "note": ""
        })

with open("../output/metadata.csv","w",newline='',encoding='utf-8') as f:
    writer = csv.DictWriter(f, fieldnames=rows[0].keys())
    writer.writeheader()
    writer.writerows(rows)
print("metadata.csv created with", len(rows), "rows")


metadata.csv created with 96 rows


In [6]:
edit_type_choices = (
    'font',
    'logo',
    'color',
    'format',
    'layout',
    'missing_info'
)

edit_group = {
    'visual': ['font', 'color', 'logo'],
    'layout': ['format', 'layout'],
    'semantic': ['missing_info']
}

edit_severity_choices = ('low', 'medium', 'high')

mapping = {
    "change font type": "font",
    "different logo": "logo",
    "change font color": "color",
    "different format in text total receipt": "format",
    "different layout": "layout",
    "missing some information": "missing_info",
}



In [None]:
import os
import pandas as pd
from IPython.display import display
import ipywidgets as widgets

In [None]:
# --- Configurations ---
image_dir = "../input/bni-receipts/fraud/"        # your image directory
csv_path = "../output/metadata.csv"     # your existing CSV file path
label_column = "edit_type"               # column name for label

# --- Load existing CSV or create new one ---
if os.path.exists(csv_path):
    df = pd.read_csv(csv_path)
else:
    df = pd.DataFrame(columns=["filename", label_column])

# --- Collect all images in the directory ---
image_files = [f for f in os.listdir(image_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
df_existing = set(df["filename"].tolist())

# Filter only images not labeled yet
unlabeled_rows = df[df[label_column].str.lower() == 'unknown']
unlabeled_images = unlabeled_rows["filename"].tolist()

if not unlabeled_images:
    print("âœ… All images are already labeled!")
else:
    print(f"ðŸ”¹ {len(unlabeled_images)} images left to label.\n")

# --- Interactive Labeling ---
current_index = 0

# Widgets
image_widget = widgets.Image(format='jpg', width=400, height=400)
edit_type_dropdown = widgets.Dropdown(
    options=edit_type_choices,
    description='Type:',
    style={'description_width': 'initial'},
)
edit_severity_dropdown = widgets.Dropdown(
    options=edit_severity_choices,
    description='Severity:',
    style={'description_width': 'initial'},
)
save_button = widgets.Button(description="Save & Next", button_style='success')
status_label = widgets.Label()

# Function to update the displayed image
def show_image(index) -> None:
    image_path = os.path.join(image_dir, unlabeled_images[index])
    with open(image_path, "rb") as f:
        image_widget.value = f.read()
    status_label.value = f"Image {index+1} of {len(unlabeled_images)}: {unlabeled_images[index]}"
    edit_type_dropdown.value = edit_type_choices[0]
    edit_severity_dropdown.value = edit_severity_choices[0]

# Function to save label and move to next image
def on_save_clicked(b) -> None:
    global current_index, df
    
    filename = unlabeled_images[current_index]
    edit_type = edit_type_dropdown.value
    edit_severity = edit_severity_dropdown.value

    # Update the row in the DataFrame
    df.loc[df["filename"] == filename, "edit_type"] = edit_type
    df.loc[df["filename"] == filename, "edit_severity"] = edit_severity

    # Save after each label
    df.to_csv(csv_path, index=False)

    current_index += 1
    if current_index >= len(unlabeled_images):
        status_label.value = "ðŸŽ‰ All images labeled and saved!"
        image_widget.value = b""  # clear image
        edit_type_dropdown.disabled = True
        edit_severity_dropdown.disabled = True
        save_button.disabled = True
    else:
        show_image(current_index)

save_button.on_click(on_save_clicked)

# --- Display the labeling UI ---
if unlabeled_images:
    show_image(current_index)
    display(
        image_widget, 
        edit_type_dropdown, 
        edit_severity_dropdown, 
        save_button, 
        status_label
    )


ðŸ”¹ 25 images left to label.



Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x04{\x00\x00\x0c\xff\x08\x06\x00\x00\x00\xa6\x9f\xb0â€¦

Dropdown(description='Type:', options=('font', 'logo', 'color', 'format', 'layout', 'missing_info'), style=Desâ€¦

Dropdown(description='Severity:', options=('low', 'medium', 'high'), style=DescriptionStyle(description_width=â€¦

Button(button_style='success', description='Save & Next', style=ButtonStyle())

Label(value='Image 1 of 25: f_001.png')

Mengurutkan kembali kolom yang benar

In [8]:
import pandas as pd
from typing import Union

df = pd.read_csv('../output/metadata.csv')

def move_column_after(
    df: pd.DataFrame,
    col_to_move: str,
    after_col: str,
    output_path: Union[str, None] = None
) -> pd.DataFrame:
    # Ensure both columns exist
    if col_to_move not in df.columns:
        raise ValueError(f"Column '{col_to_move}' not found in DataFrame.")
    if after_col not in df.columns:
        raise ValueError(f"Column '{after_col}' not found in DataFrame.")
    
     # Create new order
    cols = list(df.columns)
    cols.remove(col_to_move)
    insert_at = cols.index(after_col) + 1
    cols.insert(insert_at, col_to_move)

    # Reorder
    df = df[cols]

    # Optionally save
    if output_path:
        df.to_csv(output_path, index=False)
        print(f"âœ… DataFrame saved to: {output_path}")

    return df

df = move_column_after(df, col_to_move='edit_severity', after_col='edit_type', output_path='../output/metadata.csv')

âœ… DataFrame saved to: ../output/metadata.csv
