## Check Environment

In [42]:
import os

# 獲取 CPU 的核心數量
cpu_cores = os.cpu_count()

if cpu_cores is not None:
    print(f"這台機器有 {cpu_cores} 個 CPU 核心。")
else:
    print("無法獲取 CPU 核心數量。")

這台機器有 4 個 CPU 核心。


In [41]:
import subprocess

def check_ffmpeg():
    try:
        # 執行 ffmpeg -version 命令並捕獲輸出
        result = subprocess.run(["ffmpeg", "-version"], capture_output=True, text=True, check=True)
        # 輸出 ffmpeg 的版本信息
        print("FFmpeg is installed. Here's the version information:")
        print(result.stdout)
    except subprocess.CalledProcessError as e:
        # 如果 ffmpeg 命令失敗，可能未安裝或配置不正確
        print("Failed to execute ffmpeg. Please check installation.")
        print(e)
    except FileNotFoundError:
        # 如果未找到 ffmpeg 命令，則 ffmpeg 未安裝
        print("FFmpeg is not installed or not found in PATH.")

check_ffmpeg()

FFmpeg is installed. Here's the version information:
ffmpeg version N-114554-g7bf85d2d3a-20240401 Copyright (c) 2000-2024 the FFmpeg developers
built with gcc 13.2.0 (crosstool-NG 1.26.0.65_ecc5e41)
configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-w64-mingw32- --arch=x86_64 --target-os=mingw32 --enable-gpl --enable-version3 --disable-debug --disable-w32threads --enable-pthreads --enable-iconv --enable-libxml2 --enable-zlib --enable-libfreetype --enable-libfribidi --enable-gmp --enable-fontconfig --enable-libharfbuzz --enable-libvorbis --enable-opencl --disable-libpulse --enable-libvmaf --disable-libxcb --disable-xlib --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-lib

## Select Env

In [None]:
!pip install -U openai-whisper
!apt-get install ffmpeg

In [1]:
import ipywidgets as widgets
import os

mode_list = ['colab', 'pc']
mode_drop = widgets.Dropdown(options=mode_list, description='Mode:')
display(mode_drop)

Dropdown(description='Mode:', options=('colab', 'pc'), value='colab')

In [2]:
mode = mode_drop.value
if mode == 'colab':
    from google.colab import drive
    drive.mount('/content/drive')
    dir_list = ["/content/drive/MyDrive/新增資料夾", os.getcwd(), 'other']
elif mode == 'pc':
    dir_list = [os.getcwd(), 'other']

In [3]:
from ipywidgets import Layout, Box

# 设置布局
layout = Layout(display='flex', flex_flow='column wrap')

# 创建下拉列表
model_size_drop = widgets.Dropdown(options=['small', 'medium', 'large'], description='Model size:')
dir_drop = widgets.Dropdown(options=dir_list, description='Dir:')
language_drop = widgets.Dropdown(options=['en', 'zh'], description='language:')

# 将下拉列表放入 Box 中，使用设置的布局
box = Box(children=[model_size_drop, dir_drop, language_drop], layout=layout)

# 显示 Box
display(box)


Box(children=(Dropdown(description='Model size:', options=('small', 'medium', 'large'), value='small'), Dropdo…

In [10]:
select_dir = dir_drop.value if dir_drop.value != 'other' else input('Enter other dir: ')
select_language = language_drop.value 
select_model_size = model_size_drop.value

print("Selected Directory:", select_dir)
print("Selected Language:", select_language)
print("Selected Model Size:", select_model_size)

Selected Directory: C:\Users\112033645\Videos
Selected Language: en
Selected Model Size: medium


In [None]:
import whisper
model = whisper.load_model(select_model_size)

In [11]:
files = [f for f in os.listdir(select_dir) if os.path.isfile(os.path.join(select_dir, f))]

file_drop = widgets.Dropdown(options=files, description='Name:')

# 显示下拉列表
display(file_drop)

Dropdown(description='Name:', options=('2024-04-16 13-27-19.mkv', '4-12.mp3', 'desktop.ini', 'port test.ipynb'…

## Convert audio type

In [12]:
import ipywidgets as widgets

audio_type_list = ['mp3', 'm4a', 'wav', 'ogg', 'flac']  # 常見的音訊格式
conver_type_drop = widgets.Dropdown(options=audio_type_list, description='Convert to:')

# 顯示下拉列表
display(conver_type_drop)

Dropdown(description='Convert to:', options=('mp3', 'm4a', 'wav', 'ogg', 'flac'), value='mp3')

In [24]:
from moviepy.editor import AudioFileClip
from moviepy.editor import VideoFileClip

input_audio_path = os.path.join(select_dir, file_drop.value)
output_audio_path = '.'.join(file_drop.value.split('.')[:-1])

video_list = ['mp4', 'mkv', 'avi', 'mov', 'wmv']

if input_audio_path.split('.')[-1] in video_list:
    clip = VideoFileClip(input_audio_path)
    output_audio_path += '.' + conver_type_drop.value
    clip.audio.write_audiofile(output_audio_path)
else:
    clip = AudioFileClip(input_audio_path)
    codec = conver_type_drop.value
    output_audio_path += '.' + codec
    clip.write_audiofile(output_audio_path, codec=codec)

MoviePy - Writing audio in 2024-04-16 13-27-19.mp3


                                                                          

MoviePy - Done.




In [21]:
input_audio_path.split('.')[-1]

'mkv'

## one-time transcription

In [None]:
files = [f for f in os.listdir(select_dir) if os.path.isfile(os.path.join(select_dir, f))]

file_drop = widgets.Dropdown(options=files, description='Name:')

display(file_drop)

In [None]:
name = file_drop.value

audio_file = os.path.join(select_dir, name)

# Transcribe the MP3 file
result = model.transcribe(audio_file, language=select_language)

# Save the transcription to a text file
with open(f"{name}.txt", "w") as file:
    file.write(result["text"])

print("Transcription saved to transcription.txt")


In [None]:
from google.colab import files
files.download(f"{name}.txt")

## time-stemp transcription

### Slice Audio

In [52]:
from pydub import AudioSegment
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import math

# 功能函數定義
def segment_audio(audio, start_time, end_time, temp_dir, temp_name, format):
    # 截取特定時間段
    segment = audio[start_time * 1000:end_time * 1000]  # 毫秒單位
    segment_file = os.path.join(temp_dir, f"{temp_name}.{format}")
    segment.export(segment_file, format=format)
    return segment_file

def try_or_make_dir(path):
    os.makedirs(path, exist_ok=True)

# 主要處理流程
def process_audio_segments(audio, audio_file, total_length, time_stemp, dir, select_format):
    try_or_make_dir("temp")  # 確保臨時目錄存在
    tasks = []

    with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:  # 利用所有可用核心
        start_time = 0
        for i in range(math.ceil(total_length / time_stemp)):
            end_time = min(start_time + time_stemp, total_length)
            seg_name = f"temp{i}"
            tasks.append(executor.submit(segment_audio, audio, start_time, end_time, "temp", seg_name, select_format))
            start_time = end_time

        # # 使用進度條監視完成情況
        # for future in tqdm(as_completed(tasks), total=len(tasks), desc="Processing segments"):
        #     pass  # 這裡可以擴展處理每個未來任務的結果

In [None]:
files = [f for f in os.listdir(select_dir) if os.path.isfile(os.path.join(select_dir, f))]

file_drop = widgets.Dropdown(options=files, description='Name:')

# 显示下拉列表
display(file_drop)

In [None]:
# 使用者輸入和檔案讀取
name = file_drop.value
select_format = name.split('.')[-1]
audio_file = os.path.join(dir, name)
audio = AudioSegment.from_file(audio_file)  # 一次性讀取音頻
total_length = audio.duration_seconds
time_stemp = 300  # 5分鐘

# 執行處理
process_audio_segments(audio, audio_file, total_length, time_stemp, dir, select_format)

### transcription

In [63]:
dir_list = [d for d in os.listdir(select_dir) if os.path.isdir(os.path.join(select_dir, d))]

dir_drop = widgets.Dropdown(options=dir_list, description='Dir:')

# 显示下拉列表
display(dir_drop)

Dropdown(description='Dir:', options=('.git', 'temp'), value='.git')

In [64]:
trans_dir = os.path.join(select_dir, dir_drop.value)

In [65]:
trans_dir

'c:\\Users\\112033645\\Videos\\whisper\\temp'

In [None]:
output_path = f'{name}.txt'

total_length = audio.duration_seconds

time_stemp = 300 # 5分鐘
start_time = 0

for i, audio_file in tqdm(enumerate(os.listdir(trans_dir)), desc="Processing segments"):
    end_time = min(start_time + time_stemp, total_length)

    # Transcribe the MP3 file
    result = model.transcribe(audio_file, language=select_language)

    # Save the transcription to a text file
    with open(output_path, "a") as file:
        file.write(f"{start_time/60}(min):\n")
        file.write(result["text"])
        file.write("\n")

    print(f"Segment {i} transcription saved to {output_path}")

    start_time = end_time