In [1]:
from moviepy.editor import VideoFileClip

def convert_mp4_to_wav(mp4_file_path, wav_file_path):
    # Load the video file
    video = VideoFileClip(mp4_file_path)
    
    # Extract the audio
    audio = video.audio
    
    # Write the audio to a WAV file
    audio.write_audiofile(wav_file_path, codec='pcm_s16le')

# Example usage
mp4_file_path = '20240530會議錄製.mp4'
wav_file_path = 'sound_data/20240530_meeting.wav'
convert_mp4_to_wav(mp4_file_path, wav_file_path)

print(f"Converted {mp4_file_path} to {wav_file_path}")


MoviePy - Writing audio in sound_data/20240530_meeting.wav


                                                                                                                       

MoviePy - Done.
Converted 20240530會議錄製.mp4 to sound_data/20240530_meeting.wav




In [2]:
import os
import torch
import whisper
from tqdm import tqdm  # Import the tqdm library

def transcribe_audio_files(folder_path, model, output_folder):
    # List all the files in the folder that are WAV files
    file_names = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f)) and f.endswith('.wav')]

    # Initialize the progress bar
    pbar = tqdm(total=len(file_names), desc="Transcribing audio files")

    for file_name in file_names:
        # Load audio
        audio_path = os.path.join(folder_path, file_name)
        audio = whisper.load_audio(audio_path)

        # Transcribe audio
        text = model.transcribe(audio)

        # Create and save text file
        output_text_path = os.path.join(output_folder, f"{file_name.split('.')[0]}_transcription.txt")
        with open(output_text_path, 'w', encoding='utf-8') as f:
            f.write(text['text'])
        
        # Update the progress bar after each file is processed
        pbar.update(1)

    # Close the progress bar
    pbar.close()

# Check if CUDA is available and set the device accordingly
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load the whisper model
model_size = "small"  # Chosen for compatibility with the available GPU VRAM
model = whisper.load_model(model_size).to(device)

# Folder paths
input_folder = 'sound_data'
output_folder = 'transcriptions'

# Create the output folder if it doesn't exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Transcribe and save text
transcribe_audio_files(input_folder, model, output_folder)

Transcribing audio files: 100%|█████████████████████████████████████████████████████████| 1/1 [06:18<00:00, 378.56s/it]


In [2]:
from pydub import AudioSegment

# Path to your MP3 file
mp3_file_path = 'sound_data/科專0627_A1.mp3'

# Load the MP3 file
audio_segment = AudioSegment.from_file(mp3_file_path, format="mp3")

# Calculate the midpoint of the audio file
midpoint = len(audio_segment) // 2

# Split the audio into two halves
first_half = audio_segment[:midpoint]
second_half = audio_segment[midpoint:]

# Path to save the first half of the audio as WAV
first_half_wav_path = 'sound_data/first_half_audio.mp3'

# Path to save the second half of the audio as WAV
second_half_wav_path = 'sound_data/second_half_audio.mp3'

# Export the first half to a WAV file
first_half.export(first_half_wav_path, format="mp3")

# Export the second half to a WAV file
second_half.export(second_half_wav_path, format="mp3")


<_io.BufferedRandom name='sound_data/second_half_audio.mp3'>