<a href="https://colab.research.google.com/github/Luhito/Labo/blob/master/PutText.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##install

In [6]:
#%pip install --upgrade pip --user
#%pip install moviepy==1.0.3
#%pip install opencv-python
#%pip install google-colab
#%pip install ffmpeg
#%pip install mecab-python3
%pip install unidic-lite



## import

In [7]:
#「動画、字幕の開始地点・終了地点・テキストを与えると字幕を付けてくれるシステム」作るぞ
import cv2
from google.colab.patches import cv2_imshow
import numpy as np

#progress barに必要
from tqdm.notebook import tqdm

#日本語の使用に必要
from PIL import Image, ImageDraw, ImageFont

#PIL型の画像を表示する
from IPython.display import display

#csv
import csv
import pandas as pd
import json

#完成した動画に音声を入れる
from moviepy.editor import VideoFileClip, AudioFileClip, CompositeAudioClip

from enum import Enum

In [8]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## クラス定義

In [9]:
#!pip install mecab-python3
tagger = MeCab.Tagger()
print(tagger.parse("pythonが大好きです"))

python	python	python	python	名詞-普通名詞-一般			0
が	ガ	ガ	が	助詞-格助詞			
大好き	ダイスキ	ダイスキ	大好き	形状詞-一般			1
です	デス	デス	です	助動詞	助動詞-デス	終止形-一般	
EOS



### 字幕タイプを定義するクラス

In [10]:
#字幕の表示位置
class CapPosType(Enum):
  #画面下部
  BOTTOM = 0
  #人物下部
  BOTTOM_PERSON = 1
  #画面外
  OUT = 2

#字幕の表示方法
class CapDispType(Enum):
  #逐次、一括、歌詞
  SEQ = 0
  ALL = 1
  LYRIC = 2

### 字幕を定義するクラス

In [11]:
class Caption:
  start = 0
  end = 0
  text = ""
  speaker = ""
  origin=(0,0)
  lefttop = (0,0)
  rightbottom = (0,0)

  def __init__(self, start, end, text, speaker):
    self.start = start
    self.end = end
    self.text = text
    self.speaker = speaker
  def __str__(self):
    return f"start:{self.start}, end:{self.end}, text:{self.text}, speaker:{self.speaker}"
  def __repr__(self):
    return self.__str__()

class Captions:
  """
    字幕を定義するクラス
  """

  _captions = []
  def __init__(self, path):

    self._captions=[]

    with open(path, 'r') as f:
      reader = csv.reader(f)
      lines = [row for row in reader]
      for line in lines:
        if line[0] == 'start':
          continue
        self._captions.append(Caption(int(line[0]), int(line[1]), line[2], line[3]))

  def getCaptions(self, frameid):
    """
    フレームIDから字幕を取得する関数：該当なしならNoneを返す
    """
    captions_in_frame=[]
    for caption in self._captions:
      if caption.start <= frameid and caption.end >= frameid:
        captions_in_frame.append(caption)
    if len(captions_in_frame) > 0:
      #print(str(c) for c in captions_in_frame)
      return captions_in_frame
    else:
      return None

  def __str__(self):
    return f"captions:{self._captions}"
  def __repr__(self):
    return self.__str__()

### 動画の情報を保存するクラス

In [12]:
class VideoCapture:
  def __init__(self, path):
    self.path = path
    self.cap = cv2.VideoCapture(path)
    self.frame_count = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
    self.fps = self.cap.get(cv2.CAP_PROP_FPS)
    self.width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    self.height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

  def __str__(self):
    return f"path:{self.path}, frame_count:{self.frame_count}, fps:{self.fps}, width:{self.width}, height:{self.height}"
  def __repr__(self):
    return self.__str__()

## メソッド定義

### テキストと背景を描く関数

In [13]:
def create_overlay(img_size, origin, text_bbox, padding, text_bg):
  """
  字幕の背景となるオーバーレイを作成する関数
  """
  overlay = Image.new('RGBA', img_size, (0,0,0,0))
  draw = ImageDraw.Draw(overlay)
  left, top, right, bottom = text_bbox
  draw.rectangle(((origin[0]+left-padding[0], origin[1]+top-padding[1]),
                  (origin[0]+right+padding[0], origin[1]+bottom+padding[1])),
                  fill=tuple(text_bg))
  return overlay

def draw_text_outline(draw, origin, text, font, font_outline_color):
  """
  テキストの輪郭を描画する関数
  """
  draw.text((origin[0]+1, origin[1]), text, font=font, fill=tuple(font_outline_color))
  draw.text((origin[0]-1, origin[1]), text, font=font, fill=tuple(font_outline_color))
  draw.text((origin[0], origin[1]+1), text, font=font, fill=tuple(font_outline_color))
  draw.text((origin[0], origin[1]-1), text, font=font, fill=tuple(font_outline_color))

def draw_text(draw, origin, text, font, font_color):
  """
  テキストを描画する関数
  """
  draw.text(origin, text, font=font, fill=tuple(font_color))
  #draw.rectangle((origin[0],origin[1],origin[0]+3,origin[1]+3), fill=tuple(font_color))

### 字幕を画面に貼る関数

In [14]:
def PutText(m, video, this_cs, frame, bg_image):
  """
  フレームに字幕を付ける関数
  """

  # frameを背景画像に貼り付け
  img = Image.fromarray(frame)
  img.putalpha(255)
  bg_image.paste(img, (0, 0))

  offset = 0 # y座標のオフセット

  for i, c in enumerate(this_cs):
    if i>1:
      print(this_cs)

    left, top, right, bottom = m.font.getbbox(c.text)
    text_height = bottom - top

    #テキストの表示位置を決める
    if m.posType == CapPosType.BOTTOM:
      origin = [c.origin[0], c.origin[1] - offset]
    elif m.posType == CapPosType.BOTTOM_PERSON:
      origin = [c.origin[0], c.origin[1] - offset]
    elif m.posType == CapPosType.OUT:
      origin = [c.origin[0], c.origin[1] - offset]

    overlay = create_overlay(bg_image.size, origin, (left, top, right, bottom), m.padding, m.text_bg)
    bg_image = Image.alpha_composite(bg_image, overlay)
    draw_text_outline(ImageDraw.Draw(bg_image), origin, c.text, m.font, m.font_outline_color)
    draw_text(ImageDraw.Draw(bg_image), origin, c.text, m.font, m.font_color)
    bg_image = Image.alpha_composite(bg_image, overlay)

    # 次の字幕のためのオフセットを計算
    offset += text_height + m.padding[1] * 2

  frame = np.array(bg_image) #BGRAになる
  frame = cv2.resize(frame, (video.width, video.height))
  frame = cv2.cvtColor(frame, cv2.COLOR_BGRA2BGR) #ここでBGRに戻る
  return frame

### 字幕を生成する関数

In [15]:
def GenerateCaption(m, cs, video):
  """
  字幕を生成する関数
  """
  # ダミーテキストで字幕の高さを計算
  dummy_text = "漢あA　、^,g"
  left, top, right, bottom = m.font.getbbox(dummy_text)
  text_height = bottom - top

  buffer = m.padding[1] * 2
  text_height += buffer

  for c in cs._captions:
    c.origin=[50, round(video.height) - text_height - m.padding[1]]

## 字幕全体の設定

In [16]:
class Manager:
  def __init__(self):
    #入力欄
    self.font_color=(0, 0, 255, 255)
    self.font_outline_color=(0, 0, 0, 255)
    self.font_size=50
    self.bg=(0, 0, 0, 255)
    self.text_bg=(32, 32, 32, 64)
    self.padding=(20, 10)
    self.wrapping=20
    self.font_path = '/content/drive/MyDrive/Labo/Fonts/MEIRYO.TTC'
    self.video_path = '/content/drive/MyDrive/Labo/001.mp4'
    self.csv_path = '/content/drive/MyDrive/Labo/vid_data/001.csv'
    self.out_dist = '/content/drive/MyDrive/Labo/'
    self.out_name = 'out.mp4'
    self.posType = CapPosType.BOTTOM
    self.dispType = CapDispType.ALL

    #自動生成
    self.font = ImageFont.truetype(self.font_path, self.font_size)
    self.out_path = self.out_dist + self.out_name

  def __str__(self):
    return f"video path={self.video_path}, csv path={self.csv_path}, out path={self.out_path}"
  def __repr__(self):
    return self.__str__()

## メインで動かすところ

### 各種読み込み・動画の処理

In [17]:
m=Manager()
video = VideoCapture(m.video_path) #ユーザ定義関数でvideoの情報を入手
cs = Captions(m.csv_path)

#videoから字幕を生成する関数(今はoriginを設定するだけ)
GenerateCaption(m, cs, video)

print(m.out_path, cv2.VideoWriter_fourcc(*'mp4v'), video.fps, (video.width, video.height))
out = cv2.VideoWriter(m.out_path, cv2.VideoWriter_fourcc(*'mp4v'), video.fps, (video.width, video.height))

#各フレームごとに処理
for i in tqdm(range(video.frame_count)):
  ret, frame = video.cap.read()
  if not ret:
    break

  this_cs = cs.getCaptions(i)

  if m.posType == CapPosType.OUT:
    bg_image = Image.new('RGBA', (video.width, int(video.height*1.3)), m.bg)
  else:
    bg_image = Image.new('RGBA', (video.width, video.height), m.bg)

  #out.write()はBGRしか扱えない！
  if this_cs != None:
    frame = PutText(m, video, this_cs, frame, bg_image) #BGRが返ってくる
    out.write(frame)
  else:
    out.write(frame)

  i += 1

out.release()

/content/drive/MyDrive/Labo/out.mp4 1983148141 30.0 (1280, 720)


  0%|          | 0/705 [00:00<?, ?it/s]

### 動画に音声を追加

In [18]:
video_clip = VideoFileClip(m.out_path)
audio_clip = AudioFileClip(m.video_path)
final_audio = CompositeAudioClip([audio_clip])
final_clip = video_clip.set_audio(final_audio)
final_clip.write_videofile(m.out_dist + "out2.mp4")

Moviepy - Building video /content/drive/MyDrive/Labo/out2.mp4.
MoviePy - Writing audio in out2TEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/drive/MyDrive/Labo/out2.mp4





Moviepy - Done !
Moviepy - video ready /content/drive/MyDrive/Labo/out2.mp4


## 動画を表示

In [19]:
#from IPython.display import Video
#Video("/content/drive/MyDrive/Labo/out.mp4", embed=True)