In [None]:
%pip install --upgrade google-cloud-speech



### Google Cloudのストレージにあるwavファイルを使って文字起こし

In [None]:
def transcribe_gcs(gcs_uri):
    """Asynchronously transcribes the audio file specified by the gcs_uri with diarization."""
    from google.cloud import speech_v1p1beta1 as speech

    client = speech.SpeechClient()

    audio = speech.RecognitionAudio(uri=gcs_uri)
    config = speech.RecognitionConfig(
        sample_rate_hertz=44100,
        audio_channel_count=2,
        enable_word_confidence=True,
        enable_automatic_punctuation=True,
        enable_word_time_offsets=True,
        language_code="ja-JP",
    )

    response = client.recognize(config=config, audio=audio)

    t = []

    for result in response.results:
        alternative = result.alternatives[0]
        print(f"Transcript: {alternative.transcript}")
        print(f"Confidence: {alternative.confidence}")

        for word_info in alternative.words:
            word = word_info.word
            start_time = word_info.start_time.total_seconds()
            end_time = word_info.end_time.total_seconds()
            speaker_tag = word_info.speaker_tag  # 話者情報を取得

            t.append((start_time, end_time, word, "", 0))

            #print(f"Word: {word}, start_time: {start_time}, end_time: {end_time}")

    return t

### 動画をwavに変換

In [None]:
def convert_mp4_to_wav(input_file):
  """ローカルの動画ファイルを同名のwavファイルに変換する関数"""
  from moviepy.editor import AudioFileClip

  # 動画ファイルの指定
  video_file = input_file

  # 動画からAudioFileClipオブジェクトを生成
  audio = AudioFileClip(video_file)

  # .wavファイルとして保存
  audio.write_audiofile(video_file.replace('.mp4', '.wav'))

### GCSにファイルをアップロード

In [None]:
from google.cloud import storage
import os

def upload_blob(bucket_name, source_file_name, destination_blob_name):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)
    blob.upload_from_filename(source_file_name)

    #print(f"File {source_file_name} uploaded to {destination_blob_name}.")

## textをまとめる関数

In [None]:
def separate_text(t):
  new_array = []
  temp_str = ""
  start_time = 0.0

  for i, (start, end, word, speaker, id) in enumerate(t):
    if len(temp_str) + len(word) <= 15:  # 15文字以内であれば追加
      temp_str += word
      if i == 0:  # 最初の単語の場合、開始時刻を保存
        start_time = start
    else:
      # 15文字を超えた場合、またはループの最後の場合、新しい配列に追加
      new_array.append((float(start_time), float(end), temp_str, '', 0))
      temp_str = word  # 次の単語から開始
      start_time = start

  # 最後の単語を追加 (ループ内で追加されない場合)
  if temp_str:
      new_array.append((float(start_time), float(t[-1][1]), temp_str, '', 0))
  return new_array


## csvに保存する関数

In [None]:
import numpy as np

def save_to_CSV(transcribed_array, csv_path, disp_all):
  dtype = [('start_time', float), ('end_time', float), ('text', 'U32'), ('speaker', 'U32'), ('id', int)]
  if disp_all:
    transcribed_array = separate_text(transcribed_array)
  structured_array = np.array(transcribed_array, dtype=dtype)
  np.savetxt(csv_path, structured_array, delimiter=',',
           fmt=['%.6f', '%.6f', '%s', '%s', '%d'], # Format specifiers for each field
           header='start_time,end_time,text,speaker,id', comments='') # Add a header
  print(f"Saved to {csv_path}")
  return structured_array

#上記を達成する関数作る


In [35]:
def transcribe(local_video_path, bucket_name, gcs_key_path, wav_name, csv_path, disp_all):
  """
    local video --> local wav
    local wav --> gcs
    gcs --> transcribe
  """
  os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = gcs_key_path

  #print(f"{uri}, {wav_out}")
  convert_mp4_to_wav(local_video_path) #i.mp4 --> i.wav
  upload_blob(bucket_name, local_video_path.replace('.mp4', '.wav'), wav_name) #i.wav on local -->upload as {out}
  t = transcribe_gcs(f"gs://{bucket_name}/{wav_name}")
  save_to_CSV(t, csv_path, disp_all)
  print('transcribe completed.')
  return t

# 動かす

In [None]:
"""t = transcribe(local_video_path="/content/drive/MyDrive/Labo/001.mp4",
               bucket_name="wits-labo-kwmr",
               wav_name="tmp.wav",
               csv_path='/content/drive/MyDrive/Labo/vid_data/001.csv')"""

MoviePy - Writing audio in /content/drive/MyDrive/Labo/001.wav




MoviePy - Done.
Transcript: ふで始まる。さくさくなものといえば、限られてきたら何 書かせたいよね。イメージだけど、割とそうかわかんね。えんだよな。正直かもしれない。
Confidence: 0.4282853901386261
Saved to /content/drive/MyDrive/Labo/vid_data/001.csv
transcribe completed.
