In [2]:
# video 에서 audio 추출 command: ffmpeg -i files/sample_video.mp4 -vn files/audio.mp3
import subprocess

def extract_audio_from_video(video_path:str, audio_path:str):
    command = ["ffmpeg", "-i", video_path, "-vn", audio_path]
    subprocess.run(command)

extract_audio_from_video('./files/sample_video.mp4', './files/audio.mp3')

In [3]:
# audio 파일 분할
from pydub import AudioSegment
import math

def cut_audio_in_chunks(*, audio_path:str, minutes_in_chunk:int, chunks_dir:str):
    track = AudioSegment.from_mp3(audio_path)
    
    minutes_in_chunk = minutes_in_chunk * 60 * 1000
    chunk_size = math.ceil(len(track) / minutes_in_chunk)
    
    for i in range(chunk_size):
        start_time = i * minutes_in_chunk
        end_time = (i + 1) * minutes_in_chunk
        
        # end_time 이 track 의 length 를 넘어 가면 알아서 마지막 index 로 맞춘다
        chunk = track[start_time:end_time]
        chunk.export(f'{chunks_dir}/chunk_{i}.mp3', format="mp3")
        

cut_audio_in_chunks(audio_path='./files/audio.mp3',  minutes_in_chunk=10, chunks_dir='./files/chunks')

In [1]:
# openai whisper 에 음성 파일을 전달 해서 녹취록 작성 
from openai import OpenAI

import os
api_key = os.getenv('OPENAI_API_KEY')

import glob
def transcribe_chunks(chunks_dir:str, dest_path:str):
    files = glob.glob(f'{chunks_dir}/*.mp3')
    
    client = OpenAI(api_key=api_key)
    for file in files:
        with open(file, 'rb') as audio_file, open(dest_path, 'a') as text_file:
            transcript = client.audio.transcriptions.create(model='whisper-1', language='en', file=audio_file)
            text_file.write(transcript.text)
    
transcribe_chunks('./files/chunks', './files/script/script.txt')