## Test Chirp2 Speech to text API

In [3]:
! pip install --upgrade google-cloud-speech

Collecting google-cloud-speech
  Downloading google_cloud_speech-2.27.0-py2.py3-none-any.whl.metadata (5.2 kB)
Downloading google_cloud_speech-2.27.0-py2.py3-none-any.whl (292 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m292.4/292.4 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0mm
[?25hInstalling collected packages: google-cloud-speech
  Attempting uninstall: google-cloud-speech
    Found existing installation: google-cloud-speech 2.26.0
    Uninstalling google-cloud-speech-2.26.0:
      Successfully uninstalled google-cloud-speech-2.26.0
Successfully installed google-cloud-speech-2.27.0


In [1]:
# Test Chirp2 Speech to text API
import os

from google.api_core.client_options import ClientOptions
from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech

MAX_AUDIO_LENGTH_SECS = 8 * 60 * 60


def run_batch_recognize():
    # Instantiates a client.
    client = SpeechClient(
        client_options=ClientOptions(
            api_endpoint="us-central1-speech.googleapis.com",
        ),
    )

    # The name of the audio file to transcribe:
    audio_gcs_uri = "gs://test_stt_mp4_to_text/test_audio1.mp3"

    config = cloud_speech.RecognitionConfig(
        auto_decoding_config={},
        features=cloud_speech.RecognitionFeatures(
            enable_automatic_punctuation=True,
        ),
        model="chirp_2",
        language_codes=["cmn-Hans-CN"],
    )

    files = [cloud_speech.BatchRecognizeFileMetadata(uri=audio_gcs_uri)]

    request = cloud_speech.BatchRecognizeRequest(
        recognizer="projects/tw-maxchens-sandbox/locations/us-central1/recognizers/chirp2-for-chinese",
        config=config,
        files=files,
        recognition_output_config=cloud_speech.RecognitionOutputConfig(
            inline_response_config=cloud_speech.InlineOutputConfig(),
        ),
    )
    operation = client.batch_recognize(request=request)

    print("Waiting for operation to complete...")
    response = operation.result(timeout=3 * MAX_AUDIO_LENGTH_SECS)
    print(response)


os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/Users/mai/Desktop/GitHub-Repos/GCP_AI/serviceAccount/tw-maxchens-sandbox-ebfac091c020.json"
run_batch_recognize()

Waiting for operation to complete...
results {
  key: "gs://test_stt_mp4_to_text/test_audio1.mp3"
  value {
    error {
      code: 13
      message: "An internal error occurred."
    }
    metadata {
      total_billed_duration {
      }
    }
    cloud_storage_result {
      uri: "gs://test_stt_mp4_to_text/transcripts/test_audio1_transcript_66c8deaf-0000-2469-883c-883d24f1306c.json"
    }
    uri: "gs://test_stt_mp4_to_text/transcripts/test_audio1_transcript_66c8deaf-0000-2469-883c-883d24f1306c.json"
  }
}
total_billed_duration {
}



In [2]:
import os
import random
from concurrent.futures import ThreadPoolExecutor, as_completed
from google.api_core import retry
from google.api_core import exceptions
from google.api_core.client_options import ClientOptions
from google.cloud.speech_v2 import SpeechClient
from google.cloud.speech_v2.types import cloud_speech

MAX_AUDIO_LENGTH_SECS = 8 * 60 * 60


def list_blobs(bucket_name, prefix=""):
    from google.cloud import storage
    storage_client = storage.Client()
    blobs = storage_client.list_blobs(bucket_name, prefix=prefix)
    return [blob for blob in blobs if blob.name.lower().endswith('.wav')]


def custom_retry_predicate(exc):
    return isinstance(exc, exceptions.ResourceExhausted)


def custom_wait_exponential(retry_attempt):
    return min(60, (2 ** retry_attempt)) + (random.randint(0, 1000) / 1000.0)


@retry.Retry(predicate=custom_retry_predicate,
             initial=1.0,
             maximum=60.0,
             multiplier=2.0,
             deadline=600.0,
             wait=custom_wait_exponential)
def transcribe_audio(gcs_uri, output_path, project_id):
    try:
        client = SpeechClient(
            client_options=ClientOptions(
                api_endpoint="us-central1-speech.googleapis.com",
            ),
        )

        config = cloud_speech.RecognitionConfig(
            auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
            language_codes=["cmn-Hant-TW"],
            model="chirp_2",
            features=cloud_speech.RecognitionFeatures(
                enable_automatic_punctuation=True,
            ),
        )

        file_metadata = cloud_speech.BatchRecognizeFileMetadata(uri=gcs_uri)

        request = cloud_speech.BatchRecognizeRequest(
            recognizer=f"projects/{project_id}/locations/us-central1/recognizers/_",
            config=config,
            files=[file_metadata],
            recognition_output_config=cloud_speech.RecognitionOutputConfig(
                inline_response_config=cloud_speech.InlineOutputConfig(),
            ),
        )

        print(f"Transcribing {gcs_uri}...")
        operation = client.batch_recognize(request=request)
        response = operation.result(timeout=3 * MAX_AUDIO_LENGTH_SECS)

        transcript = ""
        for result in response.results[gcs_uri].transcript.results:  # type: ignore
            transcript += result.alternatives[0].transcript + "\n"

        print(f"Transcription: {transcript}")

        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        with open(output_path, 'w', encoding='utf-8') as f:
            f.write(transcript)

        print(f"Transcription saved to {output_path}")
    except Exception as e:
        print(f"Error transcribing {gcs_uri}: {str(e)}")
        raise


def process_audio_files(bucket_name, output_folder, project_id, test_mode=False):
    blobs = list_blobs(bucket_name)

    with ThreadPoolExecutor(max_workers=2) as executor:  # 減少並發數量
        future_to_blob = {}
        for blob in blobs:
            output_path = os.path.join(output_folder, blob.name.rsplit('.', 1)[
                                       0] + '_transcript.txt')
            future = executor.submit(
                transcribe_audio,
                f"gs://{bucket_name}/{blob.name}",
                output_path,
                project_id
            )
            future_to_blob[future] = blob

            if test_mode:
                print("Test mode: Processing only the first file.")
                break

        for future in as_completed(future_to_blob):
            blob = future_to_blob[future]
            try:
                future.result()
            except Exception as exc:
                print(f'{blob.name} generated an exception: {exc}')

            if test_mode:
                break


if __name__ == "__main__":
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/Users/mai/Desktop/GitHub-Repos/NTHURC-MeetingMinutes/serviceAccount/nthurc-aisearch-202406-5681c7d99d70.json"
    project_id = "nthurc-aisearch-202406"
    bucket_name = "nthurc-speech-to-text-data"  # 替換為您的 GCS bucket 名稱
    output_folder = "/Users/mai/Downloads/錄音檔轉錄稿"  # 替換為您想要儲存轉錄文本的資料夾路徑

    process_audio_files(bucket_name, output_folder,
                        project_id, test_mode=False)

Transcribing gs://nthurc-speech-to-text-data/新北住都中心提供ㄧ名承辦之錄音檔_converted/202306/01/20230601092359-TRIG_linear16.wav...Transcribing gs://nthurc-speech-to-text-data/新北住都中心提供ㄧ名承辦之錄音檔_converted/202306/01/20230601092456-TRIG_linear16.wav...

Transcription: 他就是不能選。 就是到等到你看屋選屋的時候,還剩下什麼方向,就只能選那個方向。 哦,原來如此。 那再來的話,我看他寫說是7月

Transcription saved to /Users/mai/Downloads/錄音檔轉錄稿/新北住都中心提供ㄧ名承辦之錄音檔_converted/202306/01/20230601092456-TRIG_linear16_transcript.txt
Transcribing gs://nthurc-speech-to-text-data/新北住都中心提供ㄧ名承辦之錄音檔_converted/202306/01/20230601092518-TRIG_linear16.wav...
Transcription: 所以一定要把小孩子的戶口跟我簽,就是一樣。 我們是以申請人為基準,然後看他的戶籍成本,現戶全戶的戶籍成本裡面有哪些人是他的仔細清楚,這些人才能算為家庭成員,那配偶的話是不管有沒有在同一個戶籍都算為家庭成員。 哦,所以一定要,一定要就是同戶仔細清楚就對了。 對。 哦,好,那另外我想再請教,就是說他那個每人平均

Transcription saved to /Users/mai/Downloads/錄音檔轉錄稿/新北住都中心提供ㄧ名承辦之錄音檔_converted/202306/01/20230601092359-TRIG_linear16_transcript.txt
Transcribing gs://nthurc-speech-to-text-data/新北住都中心提供ㄧ名承辦之錄音檔_converted/202306/01/20230601092546-TRIG_linear16.wav...
Transcription:

In [10]:
! find /Users/mai/Downloads/錄音檔轉錄稿 -name "*.txt" | wc -l

     925


In [11]:
! find "/Users/mai/Downloads/新北住都中心提供ㄧ名承辦之錄音檔 上午11.38.06" -name "*.wav" | wc -l

     926
