In [1]:
from paddleocr import PaddleOCR
import cv2
import numpy as np
from PIL import Image

# Step 1: Load image and initialize OCR
img_path = "image_1007.jpg"
ocr = PaddleOCR(use_angle_cls=True, lang='en')
result = ocr.ocr(img_path, cls=True)

# Step 2: Read image with OpenCV
image = cv2.imread(img_path)
mask = np.zeros(image.shape[:2], dtype=np.uint8)  # start with a black mask

# Step 3: Draw white-filled polygons on mask for all detected text boxes
for line in result[0]:
    points = np.array(line[0]).astype(np.int32)
    cv2.fillPoly(mask, [points], 255)  # white area marks text

# Optional: visualize where the text is
# cv2.imwrite("text_mask.jpg", mask)

# Step 4: Inpaint (remove text)
inpainted = cv2.inpaint(image, mask, inpaintRadius=3, flags=cv2.INPAINT_TELEA)
cv2.imwrite("cleaned_image.jpg", inpainted)


which: no ccache in (/home/sam/Documents/GitHub/Ai_tasks/.venv/bin:/home/linuxbrew/.linuxbrew/bin:/home/linuxbrew/.linuxbrew/sbin:/home/sam/.pyenv/plugins/pyenv-virtualenv/shims:/home/sam/.pyenv/shims:/home/sam/.pyenv/bin:/home/linuxbrew/.linuxbrew/bin:/home/linuxbrew/.linuxbrew/sbin:/home/sam/.nvm/versions/node/v22.14.0/bin:/home/sam/.local/bin:/home/sam/bin:/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin)


[2025/05/19 10:12:48] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, use_gcu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='/home/sam/.paddleocr/whl/det/en/en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/home/sam/.paddleocr/whl/rec/en/en_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, ma

True

In [4]:
# Load key from .env
key = open(".env").readline() 
key = key.strip()


In [10]:
import cv2
import numpy as np
from paddleocr import PaddleOCR

# === Set up OpenAI client ===
from openai import OpenAI
client = OpenAI(api_key=key)  # Replace with your key

# === Load image ===
img_path = "image_1007.jpg"
image = cv2.imread(img_path)

# === Run OCR ===
ocr = PaddleOCR(use_angle_cls=True, lang='en')
result = ocr.ocr(img_path, cls=True)

# === Extract text for GPT ===
detected_texts = [line[1][0] for line in result[0]]
full_text = " ".join(detected_texts)
print("🧠 Detected:", full_text)

# === Send to GPT-4o ===
comparison_prompt = f"""
You are an assistant that rewrites meme captions to be suitable for children under 13 years old.
Keep the message fun but remove any inappropriate or mature language.
But please keep the original meaning and context of the meme.

Original: {full_text}

Kid-Friendly version:
"""

response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": comparison_prompt}],
    max_tokens=150,
    temperature=0.8
)
cleaned_caption = response.choices[0].message.content
if cleaned_caption != None:
    cleaned_caption = cleaned_caption.strip()
print("🧼 Cleaned Caption:", cleaned_caption)

# === Create mask from text bounding boxes ===
mask = np.zeros(image.shape[:2], dtype=np.uint8)
for line in result[0]:
    print("Removing:", line[1][0])
    points = np.array(line[0]).astype(np.int32)
    points = points.reshape((-1, 1, 2))  # Ensure correct shape for fillPoly
    cv2.fillPoly(mask, [points], 255)

# === Inpaint ===
inpainted = cv2.inpaint(image, mask, 3, cv2.INPAINT_TELEA)
cv2.imwrite("cleaned_image.jpg", inpainted)
print("✅ Inpainted image saved.")

# === Generate HTML output ===
html = f"""
<!DOCTYPE html>
<html>
<head><meta charset="UTF-8"><title>Meme for Kids</title></head>
<body style="text-align:center; font-family:sans-serif;">
  <h2>Cleaned Meme</h2>
  <img src="cleaned_image.jpg" width="500"/><br/><br/>
  <p><strong>Caption:</strong> {cleaned_caption}</p>
</body>
</html>
"""

with open("meme_output.html", "w", encoding="utf-8") as f:
    f.write(html)

print("📄 HTML saved as meme_output.html")


[2025/05/19 16:23:27] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, use_gcu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='/home/sam/.paddleocr/whl/det/en/en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/home/sam/.paddleocr/whl/rec/en/en_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, ma

In [5]:
import base64
import io
from PIL import Image

# Convert inpainted image to PIL and then to base64
inpainted_rgb = cv2.cvtColor(inpainted, cv2.COLOR_BGR2RGB)
pil_img = Image.fromarray(inpainted_rgb)
buffered = io.BytesIO()
pil_img.save(buffered, format="JPEG")
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")

# Use in HTML
html = f"""
<!DOCTYPE html>
<html>
<head><meta charset="UTF-8"><title>Meme for Kids</title></head>
<body style="text-align:center; font-family:sans-serif;">
  <h2>Cleaned Meme</h2>
  <img src="data:image/jpeg;base64,{img_str}" width="500"/><br/><br/>
  <p><strong>Caption:</strong> {cleaned_caption}</p>
</body>
</html>
"""

with open("meme_output.html", "w", encoding="utf-8") as f:
    f.write(html)

print("✅ HTML with embedded image saved.")


✅ HTML with embedded image saved.


Below is the latest version it takes the images detects the text and than asks gpt partially for detection of inappropriateness and than retruns safe or what to change. if there is change needed we put a black box where the text is at and than rewrite the clean version.

In [None]:
import cv2
import numpy as np
from paddleocr import PaddleOCR
from openai import OpenAI

# === Setup OpenAI client ===
client = OpenAI(api_key=key)  # ← 🔑 Replace with your actual key

# === Load image ===
img_path = "image_1057.jpg"
image = cv2.imread(img_path)

# === Run OCR ===
ocr = PaddleOCR(use_angle_cls=True, lang='en')
result = ocr.ocr(img_path, cls=True)

def black_box_and_replace(image, box, cleaned_text, font_scale=1.0, thickness=2):
    pts = np.array(box).astype(np.int32)
    x1 = min([p[0] for p in pts])
    y1 = min([p[1] for p in pts])
    x2 = max([p[0] for p in pts])
    y2 = max([p[1] for p in pts])

    # Draw solid black rectangle
    cv2.rectangle(image, (x1, y1), (x2, y2), (0, 0, 0), thickness=-1)

    # Calculate font size to fit the box
    font = cv2.FONT_HERSHEY_SIMPLEX
    text_size, _ = cv2.getTextSize(cleaned_text, font, font_scale, thickness)
    text_width, text_height = text_size

    # Adjust font size to fit the box
    max_width = x2 - x1 - 10
    while text_width > max_width and font_scale > 0.3:
        font_scale -= 0.05
        text_size, _ = cv2.getTextSize(cleaned_text, font, font_scale, thickness)
        text_width, text_height = text_size

    # Center text in box
    text_x = x1 + (x2 - x1 - text_width) // 2
    text_y = y1 + (y2 - y1 + text_height) // 2

    # Draw white text on black box
    cv2.putText(image, cleaned_text, (text_x, text_y), font, font_scale, (255, 255, 255), thickness, cv2.LINE_AA)

    return image

# === Process Each OCR Text Box ===
for line in result[0]:
    box = line[0]
    text = line[1][0].strip()

    if len(text) < 2:
        continue  # skip short garbage

    prompt = f"""
You are a content moderation assistant for meme captions.

Your job is to check if the input text is inappropriate for children under 13.

If the input **contains inappropriate language**, rewrite it into a **natural, clean, and child-appropriate version** that:
- Preserves the original intent or emotion (e.g., frustration, surprise, humor)
- Uses **real and understandable words**
- Avoids made-up substitutes or censored spellings (e.g., no asterisks or partial words)
- Uses normal sentence structure (not dot-separated words)

Keep the new version **similar in length** to the original so it fits visually in the image.
Also, as the OCR detection can be unstable, I provide you with just one OCR'red line, as well as the complete ocr detectio.
This can enable you to use the complete context of the meme for your decision.

The full OCR context: {result[0]} <-- the full context is just for meaningful meme context, this does not influence your answer in terms of if its SAFE or not.

Just the current line: {text} <-- if the current does not contain any impolite or inappropriate words (also slang), just reply with SAFE.

Only reply either with "SAFE" or the child-friendly version of the text. Do not include your reasoning or any other context in your answer.
"""

    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.3,
            max_tokens=60
        )
        reply = response.choices[0].message.content
        if reply != None:
            reply = reply.strip()
    except Exception as e:
        print(f"⚠️ GPT failed on '{text}': {e}")
        continue

    print(f"🧠 Original: {text} → GPT Reply: {reply}")

    # === Filter safe results ===
    if reply != None and reply.upper() != "SAFE":
        cleaned_text = reply.replace('"', '').replace("...", " ").replace("..", " ").replace(".", " ").strip()
        # Check if the original text was in only uppercase
        cleaned_text = cleaned_text.capitalize()
        if text == text.upper():
            cleaned_text = cleaned_text.upper()
        print("Capitalized:", cleaned_text)
        image = black_box_and_replace(image, box, cleaned_text)

# === Save Final Output ===
cv2.imwrite("partially_cleaned_image2.jpg", image)
print("✅ Saved: partially_cleaned_image2.jpg")


[2025/05/19 16:51:05] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, use_gcu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='/home/sam/.paddleocr/whl/det/en/en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/home/sam/.paddleocr/whl/rec/en/en_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, ma

[2025/05/19 16:51:06] ppocr DEBUG: dt_boxes num : 10, elapsed : 0.06732916831970215
[2025/05/19 16:51:06] ppocr DEBUG: cls num  : 10, elapsed : 0.03676247596740723
[2025/05/19 16:51:07] ppocr DEBUG: rec_res num  : 10, elapsed : 0.7555530071258545
🧠 Original: INAMEDMYHARD → GPT Reply: SAFE
🧠 Original: DRIVE "DAT ASS" → GPT Reply: DRIVE "THAT CAR"
Capitalized: DRIVE THAT CAR
🧠 Original: SOTHATONCEAMONTH MYCOMPUTER WILL → GPT Reply: SAFE
🧠 Original: ASK ME IF I WANT TO BACK DAT ASS UP. → GPT Reply: ASK ME IF I WANT TO BACK THAT UP.
Capitalized: ASK ME IF I WANT TO BACK THAT UP
✅ Saved: partially_cleaned_image2.jpg
