<a href="https://colab.research.google.com/github/Jang-KyungWuk/Arknights_Story_Translator/blob/master/Arknights%20Story%20Translator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
'''
version 3.4 
Based on Arknights Story Translator v7.0 py
with Google Cloud Vision API
with Google Translation API
'''

In [None]:
# Choose story movie file
# This code works on 1480px X 720px vid

story_movie_name="arknights_2020-08-25-23-03-09.mp4"

In [None]:
google_credential_file="PUT YOUR GOOGLE CLOUD PLATFORM CREDENTIAL FILE NAME HERE (CREDENTIAL.JSON FILE)"

In [None]:
merge_script_num=10

In [None]:
# Import Drive
from google.colab import drive
drive.mount('/content/gdrive')

# Base directory of google drive
base_dir="/content/gdrive/My Drive/"

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
# Upgrade google-cloud-vision
! pip install --upgrade google-cloud-vision

In [None]:
# Set parameters

# Black is defined as brightness of 0~50
black_error=50 

# White is defined as brightness of 205~255
white_error=50

#
lab_error=1000

# White pixel is defined as differ between min(BGR) and max(BGR) is under 10 
white_gap=10

In [None]:
# Import libraries
import cv2 as cv
import numpy as np

from google.cloud import vision
from google.cloud import translate_v2 as translate
import io
import os
import six
import time

import json

In [None]:
# Define functions

def is_dialogue(frame):
  '''is_dialogue(frame)
  Check whether given frame has dialogue
  by search black and white pixel at given area
  '''
  global black_error
  global white_error
  minimum=0+black_error
  maximum=255-white_error
    
  frame=frame[640:720,490:520]
  min_bri, max_bri=255,0
  for y_axis in frame:
    for pixel in y_axis:
      bri=sum(pixel)/3
      if bri<min_bri:
        min_bri=bri
      elif max_bri<bri:
        max_bri=bri

      if min_bri<=minimum and maximum<=max_bri:
        return True
  return False

def is_bg_black(frame):
  '''is_bg_black(frame)
  Check whether it has black background of dialogue
  by check certain positoins pixels are black 
  '''
  global black_error
  pointA=sum(frame[719,200])/3
  pointB=sum(frame[719,1280])/3
  if pointA<black_error and pointB<black_error:
    return True
  return False

def LAB(frame):
  '''LAB(frame)
  Amplify white pixels which is pixels of letter
  Calculate Letter Amplified Brightness which is sum of brightness of white pixels
  '''
  global white_error
  minimum=255-white_error
    
  # Letter Amplified Brightness
  frame=frame[640:670,485:545]
  bri=0
  for y_axis in frame:
    for pixel in y_axis:
      if max(pixel)-min(pixel)<white_gap and minimum<pixel[0] and minimum<pixel[1] or minimum<pixel[2]:
        bri+=sum(pixel)/3
  return bri

def crop_script(frame):
  '''crop_script(frame)
  Crop certain area with dialogue on frame
  '''
  return frame[620:720,220:1260]

def merge_script(frames):
  '''merge_script(frames)
  Merge into one file when multiple frame images are given
  '''
  result=[]
  for frame in frames:
    for y_axis in frame:
      result.append(y_axis)
  return np.array(result)

In [None]:
# Open video file
cap=cv.VideoCapture(base_dir+story_movie_name)

In [None]:
# Read first frame and set its values as previous values.
# This first frame will not be used.

ret,frame=cap.read()

prev_frame=frame
prev_lab_value=0
continuum_warn=False

frames=[]
scene_num=1
left_over=False

In [None]:
# Collect frames with script and merge 10 script frames to single jpg file 
while cap.isOpened():

  # Read 2nd ~ last frames
  ret,frame=cap.read()

  if not ret:
    # Video is ended and if there're saved frames not merged yet merge them regardless of amount of frames collected
    if len(frames)!=0:
      script_set=merge_script(frames)
      cv.imwrite(base_dir+"scene %s.jpg"%scene_num,script_set)
      left_over=True
    print("Final frame")
    break

  # If given frame considered to have dialogue (If it has both black-like and white-like pixel)
  if is_dialogue(frame)==True:
    # Count white pixels (Considered as amount of letters in frame)
    lab_value=LAB(frame)
    # If white pixels disappeared dramatically (If one dialogue is finished and new dialogue is printing)
    if lab_value+lab_error<prev_lab_value:
      # Check whether previous frame (last frame before dialogue disappeares) has black background for dialogue for sure
      if is_bg_black(prev_frame)==True:
        # If it is not continued frame (example: 1st frame and 2nd frame can not contain different-completed dialoogues)
        if continuum_warn!=True:

          # Crop dialogue part only and save it into list 'frames'
          frames.append(crop_script(prev_frame))

          # If 'frames' contain enough dialogues merge them and save it as jpg file
          if len(frames)==merge_script_num:
            script_set=merge_script(frames)
            cv.imwrite(base_dir+"scene %s.jpg"%scene_num,script_set)
            scene_num+=1
            frames=[]

          # Next frame can not be different-completed dialougue
          continuum_warn=True
    
    # If give frame seems to have dialogue but number of white pixel is ascending (or descend but to small) consider as dialogue is still printing, so neglect it 
    else:
      continuum_warn=False
    
    # Save current frame as previous frame
    prev_frame=frame
    prev_lab_value=lab_value
  
  # It is not considered to contain dialogue neglect this frame so next frame is not continuous
  else:
    continuum_warn=False

# If output says "Final frame" crop scripts as jpg image file from video file is completed

Final frame


In [None]:
# Set application credentials for Google Cloud Vision
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]=base_dir+google_credential_file

In [None]:
# Google Cloud Vision OCR

# Code based on
# https://cloud.google.com/vision/docs/ocr?hl=ko
# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/vision/cloud-client/detect/detect.py

def detect_text(path):
  client=vision.ImageAnnotatorClient()

  with io.open(path,'rb') as image_file:
    content=image_file.read()

  image=vision.types.Image(content=content)
  
  response=client.text_detection(image=image,
                                 image_context={"language_hints":["zh-Hans"]})
  texts=response.text_annotations
  #print("Texts:")

  result_text=[]
  result_pos=[]

  for text in texts:

    #print('\n"{}"'.format(text.description))
    vertices = ([(vertex.x,vertex.y) 
                for vertex in text.bounding_poly.vertices])
    #print('bounds: {}'.format(','.join(vertices)))

    result_text.append(text.description)
    result_pos.append(vertices)
  
  if response.error.message:
    raise Exception(
        '{}\nFor more info on error messages, check: '
        'https://cloud.google.com/apis/design/erros'.format(
            response.error.message))
  
  return result_text, result_pos

In [None]:
# Google Cloud Translation

# Code based on
# https://cloud.google.com/translate/docs/basic/translating-text#translating_text
# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/translate/cloud-client/snippets.py

def translate_text(text, target='ko'):
  translate_client=translate.Client()

  if isinstance(text, six.binary_type):
    text=text.decode('utf-8')
  
  result=translate_client.translate(text,target_language=target)
  return result['translatedText']

In [None]:
# Get text and its positon to discriminate speaker and speech
# returns speaker:speech | speaker:speech | speech(dialogue without speaker) format

def scriptize(script, pos, layers=merge_script_num):

  speaker=['']*layers
  speech=['']*layers
  
  for cursor in range(1,len(script)):
    coord=pos[cursor][0][0]
    layer=pos[cursor][0][1]//100

    if coord<255:
      speaker[layer]+=script[cursor]
    else:
      speech[layer]+=script[cursor]

  result=""
  for cursor in range (layers):
    if speaker[cursor]=='':
      result+=("%s | "%(speech[cursor]))
    else:
      result+=("%s:%s | "%(speaker[cursor],speech[cursor]))
  return result

In [None]:
# Google Cloud Vision + Google Cloud Translation

# detect_text() -> scriptize() -> translate_text()
# ["a", "sth", ...],[[1,1], [1,2], ...] -> "a:sth ..." -> "someone:says | ..."

if left_over==False:
  scene_num-=1

txt_dir=(base_dir+story_movie_name).replace('.mp4','-tr.txt')
result_txt=open(txt_dir,'w')

for file_num in range (1, scene_num):
  file_dir=base_dir+("scene %s.jpg"%file_num)
  script, pos=detect_text(file_dir)
  scriptized=scriptize(script,pos)
  translated=translate_text(scriptized)

  # after texts are returned from scriptize() there will be 10 '|' (which is normally not used in dialogues)
  # Split dialogues on '|' and make 11 parts
  # if translated result contains all 10 '|', last dialogue and speeker will be just '' but sometimes nuber of '|' does not match so last part needs to be saved for awhile

  scr_zh=scriptized.split('|')
  scr_zh.pop(-1)
  scr_ko=translated.split('|')
  temp=scr_ko.pop(-1)
  temp_check=temp.replace(' ','')

  stack=1
  while len(scr_ko)!=merge_script_num:
    if len(scr_ko)<merge_script_num:
      if stack==1 and len(temp_check)>0:
        scr_ko.append(temp)
        stack-=1
        
        temp_check=""
      else:
        scr_ko.append("번역 결과 병합 발생 경고 %s회"%stack)
    else:
      scr_ko[-2]+=scr_ko[-1]+("번역 결과 개행 발생 경고 %s회"%stack)
      scr_ko.pop(-1)
    stack+=1

  # save the result as txt file
  for line in range (merge_script_num):
    result_txt.write(scr_zh[line]+"\n")
    result_txt.write(scr_ko[line]+"\n\n")
  time.sleep(1.001)

# Do the same thing but if there is script images which has not 10 scripts (left over)
if left_over==True:
  file_dir=base_dir+("scene %s.jpg"%(scene_num))
  script, pos=detect_text(file_dir)
  scriptized=scriptize(script,pos)
  translated=translate_text(scriptized)

  scr_zh=scriptized.split('|')
  scr_zh.pop(-1)

  scr_ko=translated.split('|')
  scr_ko.pop(-1)

  stack=1
  while len(scr_ko)!=len(scr_zh):
    if len(scr_ko)<merge_script_num:
      scr_ko.append("번역 결과 병합 발생 경고 %s회"%stack)
    else:
      scr_ko[-2]+=scr_ko[-1]+("번역 결과 개행 발생 경고 %s회"%stack)
      scr_ko.pop(-1)
    stack+=1

  for line in range (len(scr_zh)):
    result_txt.write(scr_zh[line]+"\n")
    result_txt.write(scr_ko[line]+"\n\n")

# close the text file
result_txt.close()