## Check Environment

In [1]:
import os

# 獲取 CPU 的核心數量
cpu_cores = os.cpu_count()

if cpu_cores is not None:
    print(f"這台機器有 {cpu_cores} 個 CPU 核心。")
else:
    print("無法獲取 CPU 核心數量。")

這台機器有 2 個 CPU 核心。


In [2]:
import subprocess

def check_ffmpeg():
    try:
        # 執行 ffmpeg -version 命令並捕獲輸出
        result = subprocess.run(["ffmpeg", "-version"], capture_output=True, text=True, check=True)
        # 輸出 ffmpeg 的版本信息
        print("FFmpeg is installed. Here's the version information:")
        print(result.stdout)
    except subprocess.CalledProcessError as e:
        # 如果 ffmpeg 命令失敗，可能未安裝或配置不正確
        print("Failed to execute ffmpeg. Please check installation.")
        print(e)
    except FileNotFoundError:
        # 如果未找到 ffmpeg 命令，則 ffmpeg 未安裝
        print("FFmpeg is not installed or not found in PATH.")

check_ffmpeg()

FFmpeg is installed. Here's the version information:
ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-lib

## Select Env

In [3]:
!pip install -U openai-whisper
!apt-get install ffmpeg

Collecting openai-whisper
  Downloading openai-whisper-20231117.tar.gz (798 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m798.6/798.6 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tiktoken (from openai-whisper)
  Downloading tiktoken-0.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->openai-whisper)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->openai-whisper)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.

In [4]:
import ipywidgets as widgets
import os

mode_list = ['colab', 'pc']
mode_drop = widgets.Dropdown(options=mode_list, description='Mode:')
display(mode_drop)

Dropdown(description='Mode:', options=('colab', 'pc'), value='colab')

In [5]:
mode = mode_drop.value
if mode == 'colab':
    from google.colab import drive
    drive.mount('/content/drive')
    dir_list = ["/content/drive/MyDrive/新增資料夾", os.getcwd(), 'other']
elif mode == 'pc':
    dir_list = [os.getcwd(), 'other']

Mounted at /content/drive


In [6]:
from ipywidgets import Layout, Box

# 设置布局
layout = Layout(display='flex', flex_flow='column wrap')

# 创建下拉列表
model_size_drop = widgets.Dropdown(options=['small', 'medium', 'large'], description='Model size:')
dir_drop = widgets.Dropdown(options=dir_list, description='Dir:')
language_drop = widgets.Dropdown(options=['en', 'zh'], description='language:')

# 将下拉列表放入 Box 中，使用设置的布局
box = Box(children=[model_size_drop, dir_drop, language_drop], layout=layout)

# 显示 Box
display(box)


Box(children=(Dropdown(description='Model size:', options=('small', 'medium', 'large'), value='small'), Dropdo…

In [7]:
select_dir = dir_drop.value if dir_drop.value != 'other' else input('Enter other dir: ')
select_language = language_drop.value
select_model_size = model_size_drop.value

print("Selected Directory:", select_dir)
print("Selected Language:", select_language)
print("Selected Model Size:", select_model_size)

Enter other dir: /content/drive/MyDrive/Course/self-repo/whisper
Selected Directory: /content/drive/MyDrive/Course/self-repo/whisper
Selected Language: zh
Selected Model Size: medium


In [8]:
import whisper
model = whisper.load_model(select_model_size)

100%|█████████████████████████████████████| 1.42G/1.42G [00:16<00:00, 90.1MiB/s]


In [25]:
files = [f for f in os.listdir(select_dir) if os.path.isfile(os.path.join(select_dir, f))]

file_drop = widgets.Dropdown(options=files, description='Name:')

# 下拉列表
display(file_drop)

Dropdown(description='Name:', options=('whisper.ipynb', 'README.md', 'temporary_storage.ipynb', '.gitignore', …

## Convert audio type

In [24]:
import ipywidgets as widgets

audio_type_list = ['mp3', 'm4a', 'wav', 'ogg', 'flac']  # 常見的音訊格式
conver_type_drop = widgets.Dropdown(options=audio_type_list, description='Convert to:')

# 顯示下拉列表
display(conver_type_drop)

Dropdown(description='Convert to:', options=('mp3', 'm4a', 'wav', 'ogg', 'flac'), value='mp3')

In [27]:
from moviepy.editor import AudioFileClip

input_audio_path = os.path.join(select_dir, file_drop.value)
output_audio_path = '.'.join(input_audio_path.split('.')[:-1])

clip = AudioFileClip(input_audio_path)

codec = conver_type_drop.value

output_audio_path += '.' + codec

clip.write_audiofile(output_audio_path, codec=codec)

MoviePy - Writing audio in /content/drive/MyDrive/Course/self-repo/whisper/4-12 2.mp3


                                                                        

MoviePy - Done.




## one-time transcription

In [None]:
files = [f for f in os.listdir(select_dir) if os.path.isfile(os.path.join(select_dir, f))]

file_drop = widgets.Dropdown(options=files, description='Name:')

display(file_drop)

In [None]:
name = file_drop.value

audio_file = os.path.join(select_dir, name)

# Transcribe the MP3 file
result = model.transcribe(audio_file, language=select_language)

# Save the transcription to a text file
with open(f"{name}.txt", "w") as file:
    file.write(result["text"])

print("Transcription saved to transcription.txt")


In [None]:
from google.colab import files
files.download(f"{name}.txt")

## time-stemp transcription

### Slice Audio

In [11]:
try:
    import pydub
except:
    !pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [28]:
from pydub import AudioSegment
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import math

# 功能函數定義
def segment_audio(audio, start_time, end_time, temp_dir, temp_name, format):
    # 截取特定時間段
    segment = audio[start_time * 1000:end_time * 1000]  # 毫秒單位
    segment_file = os.path.join(temp_dir, f"{temp_name}.{format}")
    segment.export(segment_file, format=format)
    return segment_file

def try_or_make_dir(path):
    os.makedirs(path, exist_ok=True)

# 主要處理流程
def process_audio_segments(audio, audio_file, total_length, time_stemp, dir, select_format):
    try_or_make_dir("temp")  # 確保臨時目錄存在
    tasks = []

    with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:  # 利用所有可用核心
        for i in tqdm(range(math.ceil(total_length / time_stemp)), desc="Processing segments"):
            start_time = i * time_stemp
            end_time = min((i + 1) * time_stemp, total_length)
            seg_name = f"temp{i}"
            task = executor.submit(segment_audio, audio, start_time, end_time, "temp", seg_name, select_format)
            tasks.append(task)

    # 使用進度條監視所有任務的完成情況
    for future in tqdm(as_completed(tasks), total=len(tasks), desc="Finalizing segments"):
        pass  # 這裡可以處理每個未來任務的結果，如錯誤處理等

In [29]:
files = [f for f in os.listdir(select_dir) if os.path.isfile(os.path.join(select_dir, f))]

file_drop = widgets.Dropdown(options=files, description='Name:')

# 显示下拉列表
display(file_drop)

Dropdown(description='Name:', options=('whisper.ipynb', 'README.md', 'temporary_storage.ipynb', '.gitignore', …

In [31]:
# 使用者輸入和檔案讀取
name = file_drop.value
select_format = name.split('.')[-1]
audio_file = os.path.join(select_dir, name)
audio = AudioSegment.from_file(audio_file)  # 一次性讀取音頻
total_length = audio.duration_seconds
time_stemp = 300  # 5分鐘

# 執行處理
process_audio_segments(audio, audio_file, total_length, time_stemp, dir, select_format)

### transcription

In [33]:
dir_list = [d for d in os.listdir(select_dir) if os.path.isdir(os.path.join(select_dir, d))]
dir_list.append('other')

dir_drop = widgets.Dropdown(options=dir_list, description='Dir:')

# 显示下拉列表
display(dir_drop)

Dropdown(description='Dir:', options=('.git', 'other'), value='.git')

In [34]:
trans_dir = os.path.join(select_dir, dir_drop.value) if dir_drop.value != 'other' else input('Enter dir: ')
trans_dir

Enter dir: /content/temp


'/content/temp'

In [35]:
os.listdir(trans_dir)

['temp0.mp3',
 'temp3.mp3',
 'temp7.mp3',
 'temp6.mp3',
 'temp5.mp3',
 'temp1.mp3',
 'temp4.mp3',
 'temp2.mp3',
 'temp8.mp3',
 'temp10.mp3',
 'temp9.mp3']

In [36]:
from tqdm import tqdm
import os

output_path = '4-12-2.txt'  # Define the output path for the text file
time_stemp = 300  # Time stamp interval in seconds (5 minutes)
start_time = 0  # Initialize start time

# Ensure the directory path is correctly defined and existing
if not os.path.exists(trans_dir):
    print(f"Directory not found: {trans_dir}")
    exit(1)

# Process each file in the directory
for i, filename in tqdm(enumerate(os.listdir(trans_dir)), desc="Processing segments"):
    audio_path = os.path.join(trans_dir, filename)  # Build the full path for the audio file
    if not os.path.isfile(audio_path):
        print(f"File not found: {audio_path}")
        continue  # Skip to the next file if current file is not found

    # Transcribe the audio file
    result = model.transcribe(audio_path, language=select_language)

    # Save the transcription to a text file
    with open(output_path, "a") as file:
        file.write(f"{start_time/60}(min):\n")  # Write the start time of the segment in minutes
        file.write(result["text"])  # Append the transcription text
        file.write("\n")

    print(f"Segment {i} transcription saved to {output_path}")
    start_time += time_stemp  # Update the start time for the next segment


Processing segments: 1it [00:55, 55.33s/it]

Segment 0 transcription saved to 4-12-2.txt


Processing segments: 2it [01:48, 53.96s/it]

Segment 1 transcription saved to 4-12-2.txt


Processing segments: 3it [02:43, 54.51s/it]

Segment 2 transcription saved to 4-12-2.txt


Processing segments: 4it [03:39, 55.16s/it]

Segment 3 transcription saved to 4-12-2.txt


Processing segments: 5it [04:48, 59.93s/it]

Segment 4 transcription saved to 4-12-2.txt


Processing segments: 6it [05:41, 57.78s/it]

Segment 5 transcription saved to 4-12-2.txt


Processing segments: 7it [06:33, 55.76s/it]

Segment 6 transcription saved to 4-12-2.txt


Processing segments: 8it [07:27, 55.36s/it]

Segment 7 transcription saved to 4-12-2.txt


Processing segments: 9it [08:20, 54.46s/it]

Segment 8 transcription saved to 4-12-2.txt


Processing segments: 10it [08:23, 38.65s/it]

Segment 9 transcription saved to 4-12-2.txt


Processing segments: 11it [09:19, 50.88s/it]

Segment 10 transcription saved to 4-12-2.txt





In [37]:
from google.colab import files
files.download(output_path)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Real time transcription(Colab)

In [None]:
!pip install pyngrok
from pyngrok import ngrok

# 開始端口轉發
ngrok_tunnel = ngrok.connect(5000)  # 將本地主機的 5000 端口轉發到 ngrok 分配的公共 URL

# 獲取公共 URL
print("Public URL:", ngrok_tunnel.public_url)

# 停止端口轉發
ngrok_tunnel.close()
