<a href="https://colab.research.google.com/github/Min1222Ag/Digital-Watermarking/blob/main/model_skeleton.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# YOLO > CRNN : 말풍선 탐지

1. YOLO로 말풍선 탐지
2. CRNN으로 텍스트 탐지

In [None]:
import torch
from PIL import Image
import torchvision.transforms as transforms
from torchvision.models.detection import fasterrcnn_resnet50_fpn

def load_model():
    model = fasterrcnn_resnet50_fpn(pretrained=True)
    model.eval()
    return model

def detect_balloons(image_path, model):
    image = Image.open(image_path)
    transform = transforms.Compose([transforms.ToTensor()])
    image = transform(image).unsqueeze(0)
    predictions = model(image)

    # 말풍선을 위한 클래스 ID 확인 필요 (일반적으로 클래스 ID는 데이터셋에 따라 다름)
    # 예제에서는 임의의 클래스 ID를 사용
    balloon_id = 91  # 예시 ID, 실제 ID는 데이터셋에 따라 다름
    pred_class = [int(i) for i in list(predictions[0]['labels'])]
    pred_boxes = [[(i[0], i[1], i[2], i[3]) for i in list(predictions[0]['boxes'])]]
    pred_score = list(predictions[0]['scores'])

    # 스코어가 0.5 이상인 말풍선만 선택
    balloon_boxes = [pred_boxes[i] for i in range(len(pred_score)) if pred_score[i] > 0.5 and pred_class[i] == balloon_id]
    return balloon_boxes

model = load_model()
balloon_boxes = detect_balloons('path_to_image.jpg', model)

In [None]:
import cv2
import pytesseract

def extract_text_from_balloons(image_path, balloon_boxes):
    image = cv2.imread(image_path)
    texts = []
    for box in balloon_boxes:
        roi = image[int(box[1]):int(box[3]), int(box[0]):int(box[2])]
        text = pytesseract.image_to_string(roi, lang='kor+eng')
        texts.append(text)
    return texts

texts = extract_text_from_balloons('path_to_image.jpg', balloon_boxes)


In [None]:
def insert_watermark(texts, balloon_boxes, image_path):
    image = Image.open(image_path)
    draw = ImageDraw.Draw(image)
    font = ImageFont.truetype("arial.ttf", 14)  # 폰트와 크기는 조절 가능

    for text, box in zip(texts, balloon_boxes):
        draw.text((box[0], box[1]), text, font=font, fill=(255,0,0))

    image.save('watermarked_image.jpg')

insert_watermark(texts, balloon_boxes, 'path_to_image.jpg')


# YOLO > CRNN : 텍스트 탐지

In [None]:
!pip install opencv-python-headless numpy

In [None]:
import cv2
import numpy as np

def load_pretrained_east_model():
    # EAST 모델 파일 경로
    model_path = 'frozen_east_text_detection.pb'
    return cv2.dnn.readNet(model_path)

def detect_text(image_path, net, confThreshold=0.5, nmsThreshold=0.4):
    image = cv2.imread(image_path)
    orig_image = image.copy()
    (H, W) = image.shape[:2]

    # EAST 모델은 입력 이미지의 크기가 32의 배수가 되어야 합니다.
    newW, newH = (320, 320)
    rW = W / float(newW)
    rH = H / float(newH)

    # 이미지 전처리
    blob = cv2.dnn.blobFromImage(image, 1.0, (newW, newH), (123.68, 116.78, 103.94), True, False)
    net.setInput(blob)
    (scores, geometry) = net.forward(["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"])

    # 텍스트 검출 및 NMS 적용
    (rects, confidences) = decode_predictions(scores, geometry, confThreshold)
    indices = cv2.dnn.NMSBoxesRotated(rects, confidences, confThreshold, nmsThreshold)
    detections = []

    # 검출된 텍스트 영역 처리
    for i in indices:
        vertices = cv2.boxPoints(rects[i[0]])
        for j in range(4):
            vertices[j][0] *= rW
            vertices[j][1] *= rH
        detections.append(vertices)
    return orig_image, detections

def decode_predictions(scores, geometry, confThreshold):
    (numRows, numCols) = scores.shape[2:4]
    rects = []
    confidences = []

    for y in range(0, numRows):
        scoresData = scores[0, 0, y]
        xData0 = geometry[0, 0, y]
        xData1 = geometry[0, 1, y]
        xData2 = geometry[0, 2, y]
        xData3 = geometry[0, 3, y]
        anglesData = geometry[0, 4, y]

        for x in range(0, numCols):
            if scoresData[x] < confThreshold:
                continue

            (offsetX, offsetY) = (x * 4.0, y * 4.0)
            angle = anglesData[x]
            cos = np.cos(angle)
            sin = np.sin(angle)
            h = xData0[x] + xData1[x]
            w = xData2[x] + xData3[x]
            endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
            endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
            startX = int(endX - w)
            startY = int(endY - h)
            rects.append(cv2.rotatedRectangle(startX, startY, int(w), int(h), -angle * 180.0 / np.pi))
            confidences.append(scoresData[x])
    return (rects, confidences)

# 모델 로드
net = load_pretrained_east_model()

# 이미지에서 텍스트 검출
image_path = 'path_to_your_image.jpg'
original_image, detections = detect_text(image_path, net) # detection 변수에 텍스트 검출 영역이 할당됌



In [None]:
def apply_dct_watermark(image, detections, watermark_text, alpha=0.01):
    # 이미지를 회색조로 변환
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    watermarked_image = gray.copy().astype(np.float32)

    # 워터마크 텍스트를 각 텍스트 영역에 삽입
    for vertices in detections:
        # 각 영역의 경계를 계산
        x, y, w, h = cv2.boundingRect(vertices)
        # 영역 내부에 워터마크 적용
        roi = gray[y:y+h, x:x+w]
        roi_dct = cv2.dct(roi.astype(np.float32))
        watermark = np.zeros_like(roi, dtype=np.float32)
        cv2.putText(watermark, watermark_text, (0, h // 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, 255, 1)
        watermark_dct = cv2.dct(watermark)
        roi_dct += alpha * watermark_dct
        watermarked_roi = cv2.idct(roi_dct)

        # 워터마킹된 ROI를 원본 이미지에 복사
        watermarked_image[y:y+h, x:x+w] = watermarked_roi

    # 결과 이미지는 uint8로 변환
    watermarked_image = np.clip(watermarked_image, 0, 255).astype(np.uint8)
    return watermarked_image

# 워터마크를 적용
watermarked_image = apply_dct_watermark(original_image, detections, 'Secret', alpha=0.01)

# 결과 이미지 저장
cv2.imwrite('watermarked_image.jpg', watermarked_image)
