In [3]:
from transformers import pipeline
import moviepy.editor as mp
import tempfile
import os
import librosa
import soundfile as sf
import pandas as pd
import re

# Load the Whisper model for automatic speech recognition
asr = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3-turbo")

def extract_audio_from_video(video_path):
    # Load the video and extract the audio as a separate file
    video = mp.VideoFileClip(video_path)
    temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
    video.audio.write_audiofile(temp_audio.name)
    video.close()  # Close the VideoFileClip to release resources
    return temp_audio

def split_audio(audio_path, segment_duration=60):
    # Load the audio file using librosa
    audio, sr = librosa.load(audio_path, sr=None)

    # Calculate the total duration of the audio and split it into segments of specified duration
    total_duration = librosa.get_duration(y=audio, sr=sr)
    segments = []

    for start in range(0, int(total_duration), segment_duration):
        end = min(start + segment_duration, int(total_duration))
        segment = audio[start * sr:end * sr]
        
        # Save each segment to a temporary file
        temp_segment = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
        sf.write(temp_segment.name, segment, sr)
        segments.append((temp_segment, start, end))

    return segments

def transcribe_and_beautify_audio_segments(segments):
    # Transcribe and beautify each audio segment
    transcriptions = []
    previous_text_fragment = ""

    for segment, start, end in segments:
        # Transcribe the audio segment using the Whisper model
        transcription = asr(segment.name, return_timestamps=True)
        raw_text = transcription['text']

        # If there was an incomplete sentence from the previous segment, prepend it
        if previous_text_fragment:
            raw_text = previous_text_fragment + " " + raw_text

        # Check if the raw_text ends with a complete sentence
        if not raw_text.endswith(('.', '!', '?')):
            # If it ends with an incomplete sentence, store it for the next segment
            # Extract the incomplete sentence
            sentence_endings = re.compile(r'(?<=[.!?])\s+')
            sentences = sentence_endings.split(raw_text)
            if len(sentences) > 1:
                previous_text_fragment = sentences[-1].strip()
            else:
                previous_text_fragment = raw_text.strip()
        else:
            previous_text_fragment = ""

        # Append the transcription along with its start and end timestamps
        transcriptions.append({
            'start_time': start,
            'end_time': end,
            'raw_text': raw_text
        })

        # Clean up the temporary segment file
        segment.close()
        os.unlink(segment.name)
    
    # Handle any remaining incomplete sentence from the last segment
    if previous_text_fragment:
        # Add the incomplete sentence to the last transcription
        transcriptions[-1]['raw_text'] += ' ' + previous_text_fragment

    return transcriptions

# Specify the folder containing your video files
video_folder = "lecture_videos"  # Replace with the path to your video folder

# Supported video file extensions
video_extensions = ('.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv')

# List all video files in the folder
video_files = [os.path.join(video_folder, f) for f in os.listdir(video_folder)
               if f.lower().endswith(video_extensions)]

# Check if any video files are found
if not video_files:
    print(f"No video files found in {video_folder}")
else:
    all_transcriptions = []

    for video_file in video_files:
        print(f"Processing {video_file}...")
        try:

            # Extract the video name without extension
            video_name = os.path.splitext(os.path.basename(video_file))[0]

            # Extract audio from the local video file
            audio_file = extract_audio_from_video(video_file)

            # Split the audio into smaller segments of 60 seconds each
            segments = split_audio(audio_file.name)

            # Transcribe and beautify each audio segment
            corrected_transcriptions = transcribe_and_beautify_audio_segments(segments)

            # Add the video filename to each transcription
            for transcription in corrected_transcriptions:
                transcription['video'] = video_name

            # Append the transcriptions to the list
            all_transcriptions.extend(corrected_transcriptions)

            # Clean up the temporary audio file
            audio_file.close()
            os.unlink(audio_file.name)
        except Exception as e:
            print(f"An error occurred while processing {video_file}: {e}")
            continue

    # Create a DataFrame to store the corrected transcriptions with timestamps and video names
    df = pd.DataFrame(all_transcriptions)

    # Reorder columns for better readability
    df = df[['video', 'start_time', 'end_time', 'raw_text']]

    # Save the DataFrame to an Excel file
    output_excel_path = "transcriptions.xlsx"
    df.to_excel(output_excel_path, index=False)

    # Print the path to the Excel file
    print(f"Transcriptions have been saved to {output_excel_path}")


Processing lecture_videos\GAIT Teams Lecture-20240829_184346-Meeting Recording.mp4...
MoviePy - Writing audio in C:\Users\abhis\AppData\Local\Temp\tmpx8vdvdvx.wav


                                                                          

MoviePy - Done.
Processing lecture_videos\GAIT Teams Lecture-20240903_184512-Meeting Recording.mp4...
MoviePy - Writing audio in C:\Users\abhis\AppData\Local\Temp\tmprtg83nd8.wav


                                                                          

MoviePy - Done.
Processing lecture_videos\GAIT Teams Lecture-20240905_184245-Meeting Recording.mp4...
MoviePy - Writing audio in C:\Users\abhis\AppData\Local\Temp\tmpond93iwb.wav


                                                                        

MoviePy - Done.
Processing lecture_videos\GAIT Teams Lecture-20240910_184443-Meeting Recording.mp4...
MoviePy - Writing audio in C:\Users\abhis\AppData\Local\Temp\tmpuftjvnqx.wav


                                                                        

MoviePy - Done.
Processing lecture_videos\GAIT Teams Lecture-20241008_184347-Meeting Recording.mp4...
MoviePy - Writing audio in C:\Users\abhis\AppData\Local\Temp\tmpx6lgrkji.wav


                                                                        

MoviePy - Done.
Processing lecture_videos\GAIT Teams Lecture-20241022_184138-Meeting Recording.mp4...
MoviePy - Writing audio in C:\Users\abhis\AppData\Local\Temp\tmpb3cm49gb.wav


                                                                        

MoviePy - Done.
Processing lecture_videos\GAIT Teams Lecture-20241029_184026-Meeting Recording.mp4...
MoviePy - Writing audio in C:\Users\abhis\AppData\Local\Temp\tmp2tc141c5.wav


                                                                        

MoviePy - Done.
Transcriptions have been saved to transcriptions.xlsx
