---
# Prerequisite: Mount your gdrive.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

---
# Prerequisite: Install libraries.

In [None]:
!pip install easyocr
!pip install pygame
!pip install --upgrade google-cloud-translate

---
# Functions for creating gray-scale text image.

In [None]:
import cv2
import numpy as np
import pygame
import pygame.locals
from pygame import freetype


def render_normal(font, text):
    line_spacing = font.get_sized_height() + 1
    line_bounds = font.get_rect(text)
    fsize = (round(2.0 * line_bounds.width), round(1.25 * line_spacing))
    surf = pygame.Surface(fsize, pygame.locals.SRCALPHA, 32)
    x, y = 0, line_spacing
    
    rect = font.render_to(surf, (x, y), text)
    rect.x = x + rect.x
    rect.y = y - rect.y
    
    surf = pygame.surfarray.pixels_alpha(surf).swapaxes(0, 1)
    loc = np.where(surf > 20)
    miny, minx = np.min(loc[0]), np.min(loc[1])
    maxy, maxx = np.max(loc[0]), np.max(loc[1])
    return surf[miny:maxy+1, minx:maxx+1], rect

def make_standard_text(font_path, text, shape, padding = 0.1, color = (0, 0, 0), init_fontsize = 25):
    font = freetype.Font(font_path)
    font.antialiased = True
    font.origin = True
    fontsize = init_fontsize
    font.size = fontsize
    pre_remain = None
    if padding < 1:
        border = int(min(shape) * padding)
    else:
        border = int(padding)
    target_shape = tuple(np.array(shape) - 2 * border)
    while True:
        rect = font.get_rect(text)
        res_shape = tuple(np.array(rect[1:3]))
        remain = np.min(np.array(target_shape) - np.array(res_shape))
        if pre_remain is not None:
            m = pre_remain * remain
            if m <= 0:
                if m < 0 and remain < 0:
                    fontsize -= 1
                if m == 0 and remain != 0:
                    if remain < 0:
                        fontsize -= 1
                    elif remain > 0:
                        fontsize += 1
                break
        if remain < 0:
            if fontsize == 2:
                break
            fontsize -= 1
        else:
            fontsize += 1
        pre_remain = remain
        font.size = fontsize

    surf, rect = render_normal(font, text)
    if np.max(np.array(surf.shape) - np.array(target_shape)) > 0:
        scale = np.min(np.array(target_shape, dtype = np.float32) / np.array(surf.shape, dtype = np.float32))
        to_shape = tuple((np.array(surf.shape) * scale).astype(np.int32)[::-1])
        surf = cv2.resize(surf, to_shape)
    canvas = np.zeros(shape, dtype = np.uint8)
    tly, tlx = int((shape[0] - surf.shape[0]) // 2), int((shape[1] - surf.shape[1]) // 2)
    canvas[tly:tly+surf.shape[0], tlx:tlx+surf.shape[1]] = surf
    canvas = ((1. - canvas.astype(np.float32) / 255.) * 127.).astype(np.uint8)

    return cv2.cvtColor(canvas, cv2.COLOR_GRAY2RGB)
  
freetype.init()
pygame.init()

---
# Utility functions

In [None]:
def draw_boxes(image, bounds, color='yellow', width=2):
    draw = ImageDraw.Draw(image)
    for bound in bounds:
        p0, p1, p2, p3 = bound[0]
        draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width)
    return image

def is_hangul_character(char):
    value = ord(char)
    return value >= 4352 and value >= 4607

def is_hangul(string):
    return all(is_hangul_character(i) for i in string)

---
# Functions for creating style-preserving translated image

#### def create_image(image, hangul_bounds, target_lang='en'): 
Description : Using bounding boxes and words in hangul_bounds, crop the original image, create gray-scale text image, create style-preserving translated image by SRNet, and paste it into original image.

Parameters : <br/>
  - image : PIL.image <br/>
    + The original image which we want to apply style-preserving translation.
<br/>
<br/>
  - hangul_bounds : List of tuple
    + The output of easyocr. Tuple consists of bounding box, string, confidence score.
<br/>
<br/>
  - target_lang(default: 'en') : string
    + String in hangul_bounds will be translated into target_lang. Default value is 'en', which means word will be translated into English.

Returns : <br/>
  - image : PIL.image
    + The style-preserving translated image.



In [None]:
import torch
from skimage.transform import resize
import torchvision.transforms.functional as F
import sys
sys.path.insert(1, '/content/drive/My Drive/CS470_Team6/SRNet')
import cfg
from model import Generator


class To_tensor(object):
    def __call__(self, sample):
        
        i_t, i_s = sample

        i_t = i_t.transpose((2, 0, 1)) /127.5 -1
        i_s = i_s.transpose((2, 0, 1)) /127.5 -1

        i_t = torch.from_numpy(i_t)
        i_s = torch.from_numpy(i_s)

        return (i_t.float(), i_s.float())


def create_image(image, hangul_bounds, target_lang='en'):
  with torch.no_grad():
    for hangul_bound in hangul_bounds:
      translated_word = translator.translate(hangul_bound[1], target_language=target_lang)['translatedText']
      bound = hangul_bound[0]
      box = bound[0] + bound[2]
      for i in range(len(box)):
        box[i] = int(box[i])
      i_s = image.crop(box)
      i_s = np.asarray(i_s)
      h, w, c = i_s.shape
      scale_ratio = cfg.data_shape[0] / h
      to_h = cfg.data_shape[0]
      to_w = int(round(int(w * scale_ratio) / 8)) * 8
      to_scale = (to_h, to_w)
      i_s = resize(i_s, to_scale, preserve_range=True)
      
      i_t = np.asarray(make_standard_text(font, translated_word, to_scale))
      i_t, i_s = To_tensor()((i_t, i_s))
      i_s = i_s.unsqueeze(0)
      i_t = i_t.unsqueeze(0)
      o_sk, o_t, o_b, o_f = G(i_t, i_s, to_scale)

      o_f = o_f.squeeze(0).detach().to('cpu')
      o_f = torch.from_numpy(resize(o_f, (c, h, w), preserve_range=True))
      o_f = F.to_pil_image((o_f + 1)/2)
      o_f = o_f.crop((0, 0, w-2, h-2))
      o_f = o_f.resize((w, h))
      image.paste(o_f, box)
  return image

---
# Run style-preserving translation

Put images into input_dir and run this code.  Then, style-perserving translated image will be created and saved in result_dir.

In [None]:
import os
import easyocr
from PIL import Image
from PIL import ImageDraw
from google.cloud import translate_v2 as translate

os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="/content/drive/My Drive/CS470_Team6/{PATH}.json" # Path to Google Translation Key
root = '/content/drive/My Drive/CS470_Team6/'
input_dir = root + '/scene_text_test/test_image/'
result_dir = root + '/scene_text_test/result_image/'


ckpt_path = '/content/drive/MyDrive/CS470_Team6/SRNet/logs/train_step-30000.model'
source_lang = 'ko'
target_lang = 'en'

error_string = '@#$%^&*=+()<>{}[]'
font = '/content/drive/My Drive/CS470_Team6/SRNet/datasets/fonts/NanumGothic-Regular.ttf'

reader = easyocr.Reader([source_lang])
translator = translate.Client()

G = Generator(in_channels = 3)
checkpoint = torch.load(ckpt_path)
G.load_state_dict(checkpoint['generator'])
G.eval()

threshold = 0.3
for filename in os.listdir(input_dir):
    if filename.endswith("jpg") or filename.endswith("png") or filename.endswith("jpeg") or filename.endswith("JPG"):
        bounds = reader.readtext(input_dir + filename)
        im = Image.open(input_dir + filename).convert('RGB')
        word_bounds = []
        for bound in bounds:
          # 문자열에 숫자가 포함되어 있지 않거나, error_string이 포함되어 있지 않은 경우에만 translation
            if not any((chr.isdigit() or chr in error_string) for chr in bound[1]):
                if bound[2] > threshold:
                    word_bounds.append(bound)
        image = create_image(im, word_bounds, target_lang=target_lang)
        image.save(result_dir + filename)