In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

In [2]:
def bezier_curve(t, P0, P1, P2, P3, P4):
    return (
        (1 - t) ** 4 * P0 +
        4 * (1 - t) ** 3 * t * P1 +
        6 * (1 - t) ** 2 * t ** 2 * P2 +
        4 * (1 - t) * t ** 3 * P3 +
        t ** 4 * P4
    )

def erase_slur(image, bezier_points):
    # Convert to binary using Otsu's thresholding
    _, binary = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    row_pixel_count = np.sum(binary > 0, axis=1)  # Compute row-wise histogram
    max_pixel_count = np.max(row_pixel_count)  # Equal to image width in most cases
    staff_lines = set(np.where(row_pixel_count > 0.7 * max_pixel_count)[0])  # PARAMETER

    for row in list(staff_lines):
        for neighbor in range(row - 1, row + 1):  # PARAMETER
            if 0 <= neighbor < binary.shape[0] and row_pixel_count[neighbor] > 0.2 * max_pixel_count:
                staff_lines.add(neighbor)  # Belong to staff line if above the 20% threshold
    staff_lines = sorted(staff_lines)

    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 35))  # PARAMETER
    stems = cv2.morphologyEx(binary, cv2.MORPH_OPEN, vertical_kernel, iterations=1)

    mask = np.zeros_like(image, dtype=np.uint8)
    t_vals = np.linspace(0, 1, 200)
    bezier_curve_points = np.array([bezier_curve(t, *bezier_points) for t in t_vals], dtype=np.int32)

    thickness = 12  # radius, not the diameter
    for pt in bezier_curve_points:
        cv2.circle(mask, (pt[0], pt[1]), thickness, 255, -1)

    # Remove slur while keeping staff and stems
    result = image.copy()
    for y in range(mask.shape[0]):
        for x in range(mask.shape[1]):
            if mask[y, x] > 0:
                if y in staff_lines or stems[y, x] > 0:
                    continue  # Keep staff lines and stems
                result[y, x] = 255  # Set slur pixels to white

    _, binary_result = cv2.threshold(result, 64, 255, cv2.THRESH_BINARY)

    return binary_result

In [3]:
csv_path = "control_points.csv"
df = pd.read_csv(csv_path)
output_folder = "Erase Slur"
os.makedirs(output_folder, exist_ok=True)
valid_rows = df[df["Invalid Bezier Curve"] == 0]

for _, row in tqdm(valid_rows.iterrows(), total=len(valid_rows), desc="Erasing Slurs"):
    
    image_path = os.path.join("Use", row["Crop File Name"])
    if not os.path.exists(image_path):
        print(f"Skipping missing image: {image_path}")
        continue
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        print(f"Error: Could not load {image_path}")
        continue

    bezier_points = np.array([
        eval(row["P0"]), eval(row["P1"]), eval(row["P2"]),
        eval(row["P3"]), eval(row["P4"])
    ])

    binary_result = erase_slur(image, bezier_points)
    output_path = os.path.join(output_folder, f"e_{row['Crop File Name']}")
    cv2.imwrite(output_path, binary_result)

Erasing Slurs: 100%|█████████████████████████████████████████████████████████████████| 636/636 [00:26<00:00, 24.03it/s]
