<a href="https://colab.research.google.com/github/PriyaKumar1717/3d-to-floor/blob/main/Untitled6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import os
import subprocess
from pydub import AudioSegment, silence
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
import logging
import moviepy.config as mp_config
import whisper

# Setup logging
logging.basicConfig(level=logging.INFO)

# Set the path to the ImageMagick binary
mp_config.IMAGEMAGICK_BINARY = r'C:\Program Files\ImageMagick-7.1.1-Q16-HDRI\magick.exe'  # Update this path to where ImageMagick is installed

def extract_audio_from_video(video_path, audio_path):
    """Extracts audio from the given video file and saves it to audio_path."""
    try:
        subprocess.run(['ffmpeg', '-i', video_path, '-q:a', '0', '-map', 'a', audio_path, '-y'], check=True)
        logging.info(f"Extracted audio to {audio_path}")
    except subprocess.CalledProcessError as e:
        logging.error(f"Error extracting audio: {e}")
        raise

def detect_silence(audio_path, silence_thresh=-40, min_silence_len=500):
    """Detects silence in the audio file and returns intervals of silence."""
    audio = AudioSegment.from_file(audio_path)
    silent_intervals = silence.detect_silence(audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh)
    silent_intervals = [(start / 1000, stop / 1000) for start, stop in silent_intervals]
    return silent_intervals

def generate_split_intervals(silent_intervals, audio_duration):
    """Generates intervals to split the audio based on detected silence."""
    intervals = []
    prev_end = 0
    for start, end in silent_intervals:
        if prev_end < start:
            intervals.append((prev_end, start))
        prev_end = end
    if prev_end < audio_duration:
        intervals.append((prev_end, audio_duration))
    return intervals

def split_audio(audio, intervals, output_folder):
    """Splits the audio into segments based on the given intervals and saves them."""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    audio_segments = []
    for i, (start, end) in enumerate(intervals):
        segment = audio[start * 1000:end * 1000]
        segment_path = os.path.join(output_folder, f'segment_{i + 1}.wav')
        segment.export(segment_path, format="wav")
        audio_segments.append(segment_path)
    return audio_segments

def transcribe_audio_segment_whisper(model, audio_path):
    """Transcribes the given audio segment using the Whisper model."""
    try:
        result = model.transcribe(audio_path, fp16=False)
        text = result['text'].strip()
        if not text:
            text = "No transcription available"
        logging.info(f"Transcribed text: {text}")
        return text
    except Exception as e:
        logging.error(f"Error transcribing audio: {e}")
        return "No transcription available"

def split_video(video_path, intervals, output_folder, texts, gif_output_folder):
    """Splits the video into segments based on the given intervals and adds transcribed text."""
    video = VideoFileClip(video_path)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    if not os.path.exists(gif_output_folder):
        os.makedirs(gif_output_folder)

    for i, (start, end) in enumerate(intervals):
        clip = video.subclip(start, end)
        text = texts[i] if texts[i] else "No transcription available"
        logging.info(f"Adding text to video segment: '{text}'")
        clip = add_text_to_clip(clip, text)
        clip_path = os.path.join(output_folder, f'word_{i + 1}.mp4')
        clip.write_videofile(clip_path, codec='libx264')
        logging.info(f"Saved video segment: {clip_path}")
        gif_filename = f'word_{i + 1}.gif'
        convert_to_gif(clip, gif_filename, gif_output_folder)

def add_text_to_clip(clip, text):
    """Adds text to the given video clip."""
    try:
        txt_clip = TextClip(text, fontsize=120, color='red', font='Arial-Bold')  # Adjust fontsize for readability
        txt_clip = txt_clip.set_pos(('center', 'bottom')).set_duration(clip.duration)
        return CompositeVideoClip([clip, txt_clip])
    except Exception as e:
        logging.error(f"Error creating TextClip with text '{text}': {e}")
        raise

def convert_to_gif(clip, gif_filename, gif_output_folder):
    """Converts a video clip to GIF and saves it."""
    try:
        gif_path = os.path.join(gif_output_folder, gif_filename)
        clip.write_gif(gif_path, fps=15, program='imageio', opt='nq', fuzz=10)
        logging.info(f"Saved GIF: {gif_path}")
    except Exception as e:
        logging.error(f"Error converting video to GIF: {e}")
        raise


def main():
    video_path = 'video.avi'
    audio_path = 'audio.wav'
    output_folder = 'output_segments'
    gif_output_folder = 'gif_segments'  # New folder for saving GIFs

    try:
        # Step 1: Extract audio from video
        extract_audio_from_video(video_path, audio_path)

        # Step 2: Detect silence in the audio
        silent_intervals = detect_silence(audio_path)

        # Step 3: Generate split intervals
        audio = AudioSegment.from_file(audio_path)
        audio_duration = len(audio) / 1000
        split_intervals = generate_split_intervals(silent_intervals, audio_duration)

        # Step 4: Split audio into segments
        audio_segments = split_audio(audio, split_intervals, output_folder)

        # Step 5: Load Whisper model once
        model = whisper.load_model("base")

        # Step 6: Transcribe each audio segment using Whisper
        texts = [transcribe_audio_segment_whisper(model, segment) for segment in audio_segments]

        # Step 7: Split the video based on intervals and convert to GIF with text
        split_video(video_path, split_intervals, output_folder, texts, gif_output_folder)

    except Exception as e:
        logging.error(f"An error occurred: {e}")

if __name__ == '__main__':
    main()

ERROR:root:Error extracting audio: Command '['ffmpeg', '-i', 'video.avi', '-q:a', '0', '-map', 'a', 'audio.wav', '-y']' returned non-zero exit status 1.
ERROR:root:An error occurred: Command '['ffmpeg', '-i', 'video.avi', '-q:a', '0', '-map', 'a', 'audio.wav', '-y']' returned non-zero exit status 1.


In [2]:
!pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [5]:
!pip install whisper

Collecting whisper
  Downloading whisper-1.1.10.tar.gz (42 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: whisper
  Building wheel for whisper (setup.py) ... [?25l[?25hdone
  Created wheel for whisper: filename=whisper-1.1.10-py3-none-any.whl size=41120 sha256=6bf800efe0e6565d8e19d13e5dbb5643ff36a97fcea898cc5bdfba77aeb619a9
  Stored in directory: /root/.cache/pip/wheels/aa/7c/1d/015619716e2facae6631312503baf3c3220e6a9a3508cb14b6
Successfully built whisper
Installing collected packages: whisper
Successfully installed whisper-1.1.10


In [7]:
import os
import subprocess
from pydub import AudioSegment, silence
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
import logging
import moviepy.config as mp_config
import whisper

# Setup logging
logging.basicConfig(level=logging.INFO)

# Set the path to the ImageMagick binary
mp_config.IMAGEMAGICK_BINARY = r'C:\Program Files\ImageMagick-7.1.1-Q16-HDRI\magick.exe'  # Update this path to where ImageMagick is installed

def extract_audio_from_video(video_path, audio_path):
    """Extracts audio from the given video file and saves it to audio_path."""
    try:
        result = subprocess.run(['ffmpeg', '-i', video_path, '-q:a', '0', '-map', 'a', audio_path, '-y'], check=True, capture_output=True, text=True)
        logging.info(f"Extracted audio to {audio_path}")
    except subprocess.CalledProcessError as e:
        logging.error(f"Error extracting audio: {e.stderr}")
        raise

def detect_silence(audio_path, silence_thresh=-40, min_silence_len=500):
    """Detects silence in the audio file and returns intervals of silence."""
    audio = AudioSegment.from_file(audio_path)
    silent_intervals = silence.detect_silence(audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh)
    silent_intervals = [(start / 1000, stop / 1000) for start, stop in silent_intervals]
    return silent_intervals

def generate_split_intervals(silent_intervals, audio_duration):
    """Generates intervals to split the audio based on detected silence."""
    intervals = []
    prev_end = 0
    for start, end in silent_intervals:
        if prev_end < start:
            intervals.append((prev_end, start))
        prev_end = end
    if prev_end < audio_duration:
        intervals.append((prev_end, audio_duration))
    return intervals

def split_audio(audio, intervals, output_folder):
    """Splits the audio into segments based on the given intervals and saves them."""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    audio_segments = []
    for i, (start, end) in enumerate(intervals):
        segment = audio[start * 1000:end * 1000]
        segment_path = os.path.join(output_folder, f'segment_{i + 1}.wav')
        segment.export(segment_path, format="wav")
        audio_segments.append(segment_path)
    return audio_segments

def transcribe_audio_segment_whisper(model, audio_path):
    """Transcribes the given audio segment using the Whisper model."""
    try:
        result = model.transcribe(audio_path, fp16=False)
        text = result['text'].strip()
        if not text:
            text = "No transcription available"
        logging.info(f"Transcribed text: {text}")
        return text
    except Exception as e:
        logging.error(f"Error transcribing audio: {e}")
        return "No transcription available"

def split_video(video_path, intervals, output_folder, texts, gif_output_folder):
    """Splits the video into segments based on the given intervals and adds transcribed text."""
    video = VideoFileClip(video_path)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    if not os.path.exists(gif_output_folder):
        os.makedirs(gif_output_folder)

    for i, (start, end) in enumerate(intervals):
        clip = video.subclip(start, end)
        text = texts[i] if texts[i] else "No transcription available"
        logging.info(f"Adding text to video segment: '{text}'")
        clip = add_text_to_clip(clip, text)
        clip_path = os.path.join(output_folder, f'word_{i + 1}.mp4')
        clip.write_videofile(clip_path, codec='libx264')
        logging.info(f"Saved video segment: {clip_path}")
        gif_filename = f'word_{i + 1}.gif'
        convert_to_gif(clip, gif_filename, gif_output_folder)

def add_text_to_clip(clip, text):
    """Adds text to the given video clip."""
    try:
        txt_clip = TextClip(text, fontsize=120, color='red', font='Arial-Bold')  # Adjust fontsize for readability
        txt_clip = txt_clip.set_pos(('center', 'bottom')).set_duration(clip.duration)
        return CompositeVideoClip([clip, txt_clip])
    except Exception as e:
        logging.error(f"Error creating TextClip with text '{text}': {e}")
        raise

def convert_to_gif(clip, gif_filename, gif_output_folder):
    """Converts a video clip to GIF and saves it."""
    try:
        gif_path = os.path.join(gif_output_folder, gif_filename)
        clip.write_gif(gif_path, fps=15, program='imageio', opt='nq', fuzz=10)
        logging.info(f"Saved GIF: {gif_path}")
    except Exception as e:
        logging.error(f"Error converting video to GIF: {e}")
        raise

def main():
    video_path = 'video.avi'
    audio_path = 'audio.wav'
    output_folder = 'output_segments'
    gif_output_folder = 'gif_segments'  # New folder for saving GIFs

    # Check if the video file exists
    if not os.path.exists(video_path):
        logging.error(f"Video file does not exist: {video_path}")
        return

    try:
        # Step 1: Extract audio from video
        extract_audio_from_video(video_path, audio_path)

        # Step 2: Detect silence in the audio
        silent_intervals = detect_silence(audio_path)

        # Step 3: Generate split intervals
        audio = AudioSegment.from_file(audio_path)
        audio_duration = len(audio) / 1000
        split_intervals = generate_split_intervals(silent_intervals, audio_duration)

        # Step 4: Split audio into segments
        audio_segments = split_audio(audio, split_intervals, output_folder)

        # Step 5: Load Whisper model once
        model = whisper.load_model("base")

        # Step 6: Transcribe each audio segment using Whisper
        texts = [transcribe_audio_segment_whisper(model, segment) for segment in audio_segments]

        # Step 7: Split the video based on intervals and convert to GIF with text
        split_video(video_path, split_intervals, output_folder, texts, gif_output_folder)

    except Exception as e:
        logging.error(f"An error occurred: {e}")

if __name__ == '__main__':
    main()


ERROR:root:Video file does not exist: video.avi


In [8]:
import os
import subprocess
from pydub import AudioSegment, silence
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
import logging
import whisper

# Setup logging
logging.basicConfig(level=logging.INFO)

def extract_audio_from_video(video_path, audio_path):
    """Extracts audio from the given video file and saves it to audio_path."""
    try:
        result = subprocess.run(['ffmpeg', '-i', video_path, '-q:a', '0', '-map', 'a', audio_path, '-y'], check=True, capture_output=True, text=True)
        logging.info(f"Extracted audio to {audio_path}")
    except subprocess.CalledProcessError as e:
        logging.error(f"Error extracting audio: {e.stderr}")
        raise

def detect_silence(audio_path, silence_thresh=-40, min_silence_len=500):
    """Detects silence in the audio file and returns intervals of silence."""
    audio = AudioSegment.from_file(audio_path)
    silent_intervals = silence.detect_silence(audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh)
    silent_intervals = [(start / 1000, stop / 1000) for start, stop in silent_intervals]
    return silent_intervals

def generate_split_intervals(silent_intervals, audio_duration):
    """Generates intervals to split the audio based on detected silence."""
    intervals = []
    prev_end = 0
    for start, end in silent_intervals:
        if prev_end < start:
            intervals.append((prev_end, start))
        prev_end = end
    if prev_end < audio_duration:
        intervals.append((prev_end, audio_duration))
    return intervals

def split_audio(audio, intervals, output_folder):
    """Splits the audio into segments based on the given intervals and saves them."""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    audio_segments = []
    for i, (start, end) in enumerate(intervals):
        segment = audio[start * 1000:end * 1000]
        segment_path = os.path.join(output_folder, f'segment_{i + 1}.wav')
        segment.export(segment_path, format="wav")
        audio_segments.append(segment_path)
    return audio_segments

def transcribe_audio_segment_whisper(model, audio_path):
    """Transcribes the given audio segment using the Whisper model."""
    try:
        result = model.transcribe(audio_path, fp16=False)
        text = result['text'].strip()
        if not text:
            text = "No transcription available"
        logging.info(f"Transcribed text: {text}")
        return text
    except Exception as e:
        logging.error(f"Error transcribing audio: {e}")
        return "No transcription available"

def split_video(video_path, intervals, output_folder, texts, gif_output_folder):
    """Splits the video into segments based on the given intervals and adds transcribed text."""
    video = VideoFileClip(video_path)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    if not os.path.exists(gif_output_folder):
        os.makedirs(gif_output_folder)

    for i, (start, end) in enumerate(intervals):
        clip = video.subclip(start, end)
        text = texts[i] if texts[i] else "No transcription available"
        logging.info(f"Adding text to video segment: '{text}'")
        clip = add_text_to_clip(clip, text)
        clip_path = os.path.join(output_folder, f'word_{i + 1}.mp4')
        clip.write_videofile(clip_path, codec='libx264')
        logging.info(f"Saved video segment: {clip_path}")
        gif_filename = f'word_{i + 1}.gif'
        convert_to_gif(clip, gif_filename, gif_output_folder)

def add_text_to_clip(clip, text):
    """Adds text to the given video clip."""
    try:
        txt_clip = TextClip(text, fontsize=120, color='red', font='Arial-Bold')  # Adjust fontsize for readability
        txt_clip = txt_clip.set_pos(('center', 'bottom')).set_duration(clip.duration)
        return CompositeVideoClip([clip, txt_clip])
    except Exception as e:
        logging.error(f"Error creating TextClip with text '{text}': {e}")
        raise

def convert_to_gif(clip, gif_filename, gif_output_folder):
    """Converts a video clip to GIF and saves it."""
    try:
        gif_path = os.path.join(gif_output_folder, gif_filename)
        clip.write_gif(gif_path, fps=15, program='imageio', opt='nq', fuzz=10)
        logging.info(f"Saved GIF: {gif_path}")
    except Exception as e:
        logging.error(f"Error converting video to GIF: {e}")
        raise

def main():
    import tkinter as tk
    from tkinter import filedialog

    root = tk.Tk()
    root.withdraw()

    video_path = filedialog.askopenfilename(title="Select a video file", filetypes=[("Video files", "*.avi *.mp4 *.mov *.mkv")])
    if not video_path:
        logging.error("No video file selected.")
        return

    audio_path = 'audio.wav'
    output_folder = 'output_segments'
    gif_output_folder = 'gif_segments'  # New folder for saving GIFs

    try:
        # Step 1: Extract audio from video
        extract_audio_from_video(video_path, audio_path)

        # Step 2: Detect silence in the audio
        silent_intervals = detect_silence(audio_path)

        # Step 3: Generate split intervals
        audio = AudioSegment.from_file(audio_path)
        audio_duration = len(audio) / 1000
        split_intervals = generate_split_intervals(silent_intervals, audio_duration)

        # Step 4: Split audio into segments
        audio_segments = split_audio(audio, split_intervals, output_folder)

        # Step 5: Load Whisper model once
        model = whisper.load_model("base")

        # Step 6: Transcribe each audio segment using Whisper
        texts = [transcribe_audio_segment_whisper(model, segment) for segment in audio_segments]

        # Step 7: Split the video based on intervals and convert to GIF with text
        split_video(video_path, split_intervals, output_folder, texts, gif_output_folder)

    except Exception as e:
        logging.error(f"An error occurred: {e}")

if __name__ == '__main__':
    main()


TclError: no display name and no $DISPLAY environment variable

In [9]:
import os
import subprocess
from pydub import AudioSegment, silence
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
import logging
import whisper

# Setup logging
logging.basicConfig(level=logging.INFO)

def extract_audio_from_video(video_path, audio_path):
    """Extracts audio from the given video file and saves it to audio_path."""
    try:
        result = subprocess.run(['ffmpeg', '-i', video_path, '-q:a', '0', '-map', 'a', audio_path, '-y'], check=True, capture_output=True, text=True)
        logging.info(f"Extracted audio to {audio_path}")
    except subprocess.CalledProcessError as e:
        logging.error(f"Error extracting audio: {e.stderr}")
        raise

def detect_silence(audio_path, silence_thresh=-40, min_silence_len=500):
    """Detects silence in the audio file and returns intervals of silence."""
    audio = AudioSegment.from_file(audio_path)
    silent_intervals = silence.detect_silence(audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh)
    silent_intervals = [(start / 1000, stop / 1000) for start, stop in silent_intervals]
    return silent_intervals

def generate_split_intervals(silent_intervals, audio_duration):
    """Generates intervals to split the audio based on detected silence."""
    intervals = []
    prev_end = 0
    for start, end in silent_intervals:
        if prev_end < start:
            intervals.append((prev_end, start))
        prev_end = end
    if prev_end < audio_duration:
        intervals.append((prev_end, audio_duration))
    return intervals

def split_audio(audio, intervals, output_folder):
    """Splits the audio into segments based on the given intervals and saves them."""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    audio_segments = []
    for i, (start, end) in enumerate(intervals):
        segment = audio[start * 1000:end * 1000]
        segment_path = os.path.join(output_folder, f'segment_{i + 1}.wav')
        segment.export(segment_path, format="wav")
        audio_segments.append(segment_path)
    return audio_segments

def transcribe_audio_segment_whisper(model, audio_path):
    """Transcribes the given audio segment using the Whisper model."""
    try:
        result = model.transcribe(audio_path, fp16=False)
        text = result['text'].strip()
        if not text:
            text = "No transcription available"
        logging.info(f"Transcribed text: {text}")
        return text
    except Exception as e:
        logging.error(f"Error transcribing audio: {e}")
        return "No transcription available"

def split_video(video_path, intervals, output_folder, texts, gif_output_folder):
    """Splits the video into segments based on the given intervals and adds transcribed text."""
    video = VideoFileClip(video_path)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    if not os.path.exists(gif_output_folder):
        os.makedirs(gif_output_folder)

    for i, (start, end) in enumerate(intervals):
        clip = video.subclip(start, end)
        text = texts[i] if texts[i] else "No transcription available"
        logging.info(f"Adding text to video segment: '{text}'")
        clip = add_text_to_clip(clip, text)
        clip_path = os.path.join(output_folder, f'word_{i + 1}.mp4')
        clip.write_videofile(clip_path, codec='libx264')
        logging.info(f"Saved video segment: {clip_path}")
        gif_filename = f'word_{i + 1}.gif'
        convert_to_gif(clip, gif_filename, gif_output_folder)

def add_text_to_clip(clip, text):
    """Adds text to the given video clip."""
    try:
        txt_clip = TextClip(text, fontsize=120, color='red', font='Arial-Bold')  # Adjust fontsize for readability
        txt_clip = txt_clip.set_pos(('center', 'bottom')).set_duration(clip.duration)
        return CompositeVideoClip([clip, txt_clip])
    except Exception as e:
        logging.error(f"Error creating TextClip with text '{text}': {e}")
        raise

def convert_to_gif(clip, gif_filename, gif_output_folder):
    """Converts a video clip to GIF and saves it."""
    try:
        gif_path = os.path.join(gif_output_folder, gif_filename)
        clip.write_gif(gif_path, fps=15, program='imageio', opt='nq', fuzz=10)
        logging.info(f"Saved GIF: {gif_path}")
    except Exception as e:
        logging.error(f"Error converting video to GIF: {e}")
        raise

def main():
    video_path = input("Enter the path to the video file: ").strip()
    if not os.path.exists(video_path):
        logging.error(f"Video file does not exist: {video_path}")
        return

    audio_path = 'audio.wav'
    output_folder = 'output_segments'
    gif_output_folder = 'gif_segments'  # New folder for saving GIFs

    try:
        # Step 1: Extract audio from video
        extract_audio_from_video(video_path, audio_path)

        # Step 2: Detect silence in the audio
        silent_intervals = detect_silence(audio_path)

        # Step 3: Generate split intervals
        audio = AudioSegment.from_file(audio_path)
        audio_duration = len(audio) / 1000
        split_intervals = generate_split_intervals(silent_intervals, audio_duration)

        # Step 4: Split audio into segments
        audio_segments = split_audio(audio, split_intervals, output_folder)

        # Step 5: Load Whisper model once
        model = whisper.load_model("base")

        # Step 6: Transcribe each audio segment using Whisper
        texts = [transcribe_audio_segment_whisper(model, segment) for segment in audio_segments]

        # Step 7: Split the video based on intervals and convert to GIF with text
        split_video(video_path, split_intervals, output_folder, texts, gif_output_folder)

    except Exception as e:
        logging.error(f"An error occurred: {e}")

if __name__ == '__main__':
    main()


Enter the path to the video file: "C:\Users\JESUS\Dropbox\PC\Downloads\Creating Automatic GIFs from Videos.mp4"


ERROR:root:Video file does not exist: "C:\Users\JESUS\Dropbox\PC\Downloads\Creating Automatic GIFs from Videos.mp4"


In [11]:
import os
import subprocess
from pydub import AudioSegment, silence
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
import logging
import whisper

# Setup logging
logging.basicConfig(level=logging.INFO)

def extract_audio_from_video(video_path, audio_path):
    """Extracts audio from the given video file and saves it to audio_path."""
    try:
        result = subprocess.run(['ffmpeg', '-i', video_path, '-q:a', '0', '-map', 'a', audio_path, '-y'], check=True, capture_output=True, text=True)
        logging.info(f"Extracted audio to {audio_path}")
    except subprocess.CalledProcessError as e:
        logging.error(f"Error extracting audio: {e.stderr}")
        raise

def detect_silence(audio_path, silence_thresh=-40, min_silence_len=500):
    """Detects silence in the audio file and returns intervals of silence."""
    audio = AudioSegment.from_file(audio_path)
    silent_intervals = silence.detect_silence(audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh)
    silent_intervals = [(start / 1000, stop / 1000) for start, stop in silent_intervals]
    return silent_intervals

def generate_split_intervals(silent_intervals, audio_duration):
    """Generates intervals to split the audio based on detected silence."""
    intervals = []
    prev_end = 0
    for start, end in silent_intervals:
        if prev_end < start:
            intervals.append((prev_end, start))
        prev_end = end
    if prev_end < audio_duration:
        intervals.append((prev_end, audio_duration))
    return intervals

def split_audio(audio, intervals, output_folder):
    """Splits the audio into segments based on the given intervals and saves them."""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    audio_segments = []
    for i, (start, end) in enumerate(intervals):
        segment = audio[start * 1000:end * 1000]
        segment_path = os.path.join(output_folder, f'segment_{i + 1}.wav')
        segment.export(segment_path, format="wav")
        audio_segments.append(segment_path)
    return audio_segments

def transcribe_audio_segment_whisper(model, audio_path):
    """Transcribes the given audio segment using the Whisper model."""
    try:
        result = model.transcribe(audio_path, fp16=False)
        text = result['text'].strip()
        if not text:
            text = "No transcription available"
        logging.info(f"Transcribed text: {text}")
        return text
    except Exception as e:
        logging.error(f"Error transcribing audio: {e}")
        return "No transcription available"

def split_video(video_path, intervals, output_folder, texts, gif_output_folder):
    """Splits the video into segments based on the given intervals and adds transcribed text."""
    video = VideoFileClip(video_path)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    if not os.path.exists(gif_output_folder):
        os.makedirs(gif_output_folder)

    for i, (start, end) in enumerate(intervals):
        clip = video.subclip(start, end)
        text = texts[i] if texts[i] else "No transcription available"
        logging.info(f"Adding text to video segment: '{text}'")
        clip = add_text_to_clip(clip, text)
        clip_path = os.path.join(output_folder, f'word_{i + 1}.mp4')
        clip.write_videofile(clip_path, codec='libx264')
        logging.info(f"Saved video segment: {clip_path}")
        gif_filename = f'word_{i + 1}.gif'
        convert_to_gif(clip, gif_filename, gif_output_folder)

def add_text_to_clip(clip, text):
    """Adds text to the given video clip."""
    try:
        txt_clip = TextClip(text, fontsize=120, color='red', font='Arial-Bold')  # Adjust fontsize for readability
        txt_clip = txt_clip.set_pos(('center', 'bottom')).set_duration(clip.duration)
        return CompositeVideoClip([clip, txt_clip])
    except Exception as e:
        logging.error(f"Error creating TextClip with text '{text}': {e}")
        raise

def convert_to_gif(clip, gif_filename, gif_output_folder):
    """Converts a video clip to GIF and saves it."""
    try:
        gif_path = os.path.join(gif_output_folder, gif_filename)
        clip.write_gif(gif_path, fps=15, program='imageio', opt='nq', fuzz=10)
        logging.info(f"Saved GIF: {gif_path}")
    except Exception as e:
        logging.error(f"Error converting video to GIF: {e}")
        raise

def main():
    video_path = input("Enter the path to the video file: ").strip().strip('"')
    if not os.path.exists(video_path):
        logging.error(f"Video file does not exist: {video_path}")
        return

    audio_path = 'audio.wav'
    output_folder = 'output_segments'
    gif_output_folder = 'gif_segments'  # New folder for saving GIFs

    try:
        # Step 1: Extract audio from video
        extract_audio_from_video(video_path, audio_path)

        # Step 2: Detect silence in the audio
        silent_intervals = detect_silence(audio_path)

        # Step 3: Generate split intervals
        audio = AudioSegment.from_file(audio_path)
        audio_duration = len(audio) / 1000
        split_intervals = generate_split_intervals(silent_intervals, audio_duration)

        # Step 4: Split audio into segments
        audio_segments = split_audio(audio, split_intervals, output_folder)

        # Step 5: Load Whisper model once
        model = whisper.load_model("base")

        # Step 6: Transcribe each audio segment using Whisper
        texts = [transcribe_audio_segment_whisper(model, segment) for segment in audio_segments]

        # Step 7: Split the video based on intervals and convert to GIF with text
        split_video(video_path, split_intervals, output_folder, texts, gif_output_folder)

    except Exception as e:
        logging.error(f"An error occurred: {e}")

if __name__ == '__main__':
    main()


Enter the path to the video file: "C:\Users\JESUS\Dropbox\PC\Downloads\gifsauto.mp4"


ERROR:root:Video file does not exist: C:\Users\JESUS\Dropbox\PC\Downloads\gifsauto.mp4


In [12]:
import os
import subprocess
from pydub import AudioSegment, silence
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
import logging
import whisper

# Setup logging
logging.basicConfig(level=logging.INFO)

def extract_audio_from_video(video_path, audio_path):
    """Extracts audio from the given video file and saves it to audio_path."""
    try:
        result = subprocess.run(['ffmpeg', '-i', video_path, '-q:a', '0', '-map', 'a', audio_path, '-y'], check=True, capture_output=True, text=True)
        logging.info(f"Extracted audio to {audio_path}")
    except subprocess.CalledProcessError as e:
        logging.error(f"Error extracting audio: {e.stderr}")
        raise

def detect_silence(audio_path, silence_thresh=-40, min_silence_len=500):
    """Detects silence in the audio file and returns intervals of silence."""
    audio = AudioSegment.from_file(audio_path)
    silent_intervals = silence.detect_silence(audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh)
    silent_intervals = [(start / 1000, stop / 1000) for start, stop in silent_intervals]
    return silent_intervals

def generate_split_intervals(silent_intervals, audio_duration):
    """Generates intervals to split the audio based on detected silence."""
    intervals = []
    prev_end = 0
    for start, end in silent_intervals:
        if prev_end < start:
            intervals.append((prev_end, start))
        prev_end = end
    if prev_end < audio_duration:
        intervals.append((prev_end, audio_duration))
    return intervals

def split_audio(audio, intervals, output_folder):
    """Splits the audio into segments based on the given intervals and saves them."""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    audio_segments = []
    for i, (start, end) in enumerate(intervals):
        segment = audio[start * 1000:end * 1000]
        segment_path = os.path.join(output_folder, f'segment_{i + 1}.wav')
        segment.export(segment_path, format="wav")
        audio_segments.append(segment_path)
    return audio_segments

def transcribe_audio_segment_whisper(model, audio_path):
    """Transcribes the given audio segment using the Whisper model."""
    try:
        result = model.transcribe(audio_path, fp16=False)
        text = result['text'].strip()
        if not text:
            text = "No transcription available"
        logging.info(f"Transcribed text: {text}")
        return text
    except Exception as e:
        logging.error(f"Error transcribing audio: {e}")
        return "No transcription available"

def split_video(video_path, intervals, output_folder, texts, gif_output_folder):
    """Splits the video into segments based on the given intervals and adds transcribed text."""
    video = VideoFileClip(video_path)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    if not os.path.exists(gif_output_folder):
        os.makedirs(gif_output_folder)

    for i, (start, end) in enumerate(intervals):
        clip = video.subclip(start, end)
        text = texts[i] if texts[i] else "No transcription available"
        logging.info(f"Adding text to video segment: '{text}'")
        clip = add_text_to_clip(clip, text)
        clip_path = os.path.join(output_folder, f'word_{i + 1}.mp4')
        clip.write_videofile(clip_path, codec='libx264')
        logging.info(f"Saved video segment: {clip_path}")
        gif_filename = f'word_{i + 1}.gif'
        convert_to_gif(clip, gif_filename, gif_output_folder)

def add_text_to_clip(clip, text):
    """Adds text to the given video clip."""
    try:
        txt_clip = TextClip(text, fontsize=120, color='red', font='Arial-Bold')  # Adjust fontsize for readability
        txt_clip = txt_clip.set_pos(('center', 'bottom')).set_duration(clip.duration)
        return CompositeVideoClip([clip, txt_clip])
    except Exception as e:
        logging.error(f"Error creating TextClip with text '{text}': {e}")
        raise

def convert_to_gif(clip, gif_filename, gif_output_folder):
    """Converts a video clip to GIF and saves it."""
    try:
        gif_path = os.path.join(gif_output_folder, gif_filename)
        clip.write_gif(gif_path, fps=15, program='imageio', opt='nq', fuzz=10)
        logging.info(f"Saved GIF: {gif_path}")
    except Exception as e:
        logging.error(f"Error converting video to GIF: {e}")
        raise

def main():
    video_path = input("Enter the path to the video file: ").strip().strip('"')
    video_path = os.path.normpath(video_path)

    if not os.path.exists(video_path):
        logging.error(f"Video file does not exist: {video_path}")
        return

    audio_path = 'audio.wav'
    output_folder = 'output_segments'
    gif_output_folder = 'gif_segments'  # New folder for saving GIFs

    try:
        # Step 1: Extract audio from video
        extract_audio_from_video(video_path, audio_path)

        # Step 2: Detect silence in the audio
        silent_intervals = detect_silence(audio_path)

        # Step 3: Generate split intervals
        audio = AudioSegment.from_file(audio_path)
        audio_duration = len(audio) / 1000
        split_intervals = generate_split_intervals(silent_intervals, audio_duration)

        # Step 4: Split audio into segments
        audio_segments = split_audio(audio, split_intervals, output_folder)

        # Step 5: Load Whisper model once
        model = whisper.load_model("base")

        # Step 6: Transcribe each audio segment using Whisper
        texts = [transcribe_audio_segment_whisper(model, segment) for segment in audio_segments]

        # Step 7: Split the video based on intervals and convert to GIF with text
        split_video(video_path, split_intervals, output_folder, texts, gif_output_folder)

    except Exception as e:
        logging.error(f"An error occurred: {e}")

if __name__ == '__main__':
    main()


Enter the path to the video file: "C:\Users\JESUS\Dropbox\PC\Downloads\gifsauto.mp4"


ERROR:root:Video file does not exist: C:\Users\JESUS\Dropbox\PC\Downloads\gifsauto.mp4


In [13]:
import os
import subprocess
from pydub import AudioSegment, silence
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
import logging
import whisper
from tkinter import Tk
from tkinter.filedialog import askopenfilename

# Setup logging
logging.basicConfig(level=logging.INFO)

def extract_audio_from_video(video_path, audio_path):
    """Extracts audio from the given video file and saves it to audio_path."""
    try:
        result = subprocess.run(['ffmpeg', '-i', video_path, '-q:a', '0', '-map', 'a', audio_path, '-y'], check=True, capture_output=True, text=True)
        logging.info(f"Extracted audio to {audio_path}")
    except subprocess.CalledProcessError as e:
        logging.error(f"Error extracting audio: {e.stderr}")
        raise

def detect_silence(audio_path, silence_thresh=-40, min_silence_len=500):
    """Detects silence in the audio file and returns intervals of silence."""
    audio = AudioSegment.from_file(audio_path)
    silent_intervals = silence.detect_silence(audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh)
    silent_intervals = [(start / 1000, stop / 1000) for start, stop in silent_intervals]
    return silent_intervals

def generate_split_intervals(silent_intervals, audio_duration):
    """Generates intervals to split the audio based on detected silence."""
    intervals = []
    prev_end = 0
    for start, end in silent_intervals:
        if prev_end < start:
            intervals.append((prev_end, start))
        prev_end = end
    if prev_end < audio_duration:
        intervals.append((prev_end, audio_duration))
    return intervals

def split_audio(audio, intervals, output_folder):
    """Splits the audio into segments based on the given intervals and saves them."""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    audio_segments = []
    for i, (start, end) in enumerate(intervals):
        segment = audio[start * 1000:end * 1000]
        segment_path = os.path.join(output_folder, f'segment_{i + 1}.wav')
        segment.export(segment_path, format="wav")
        audio_segments.append(segment_path)
    return audio_segments

def transcribe_audio_segment_whisper(model, audio_path):
    """Transcribes the given audio segment using the Whisper model."""
    try:
        result = model.transcribe(audio_path, fp16=False)
        text = result['text'].strip()
        if not text:
            text = "No transcription available"
        logging.info(f"Transcribed text: {text}")
        return text
    except Exception as e:
        logging.error(f"Error transcribing audio: {e}")
        return "No transcription available"

def split_video(video_path, intervals, output_folder, texts, gif_output_folder):
    """Splits the video into segments based on the given intervals and adds transcribed text."""
    video = VideoFileClip(video_path)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    if not os.path.exists(gif_output_folder):
        os.makedirs(gif_output_folder)

    for i, (start, end) in enumerate(intervals):
        clip = video.subclip(start, end)
        text = texts[i] if texts[i] else "No transcription available"
        logging.info(f"Adding text to video segment: '{text}'")
        clip = add_text_to_clip(clip, text)
        clip_path = os.path.join(output_folder, f'word_{i + 1}.mp4')
        clip.write_videofile(clip_path, codec='libx264')
        logging.info(f"Saved video segment: {clip_path}")
        gif_filename = f'word_{i + 1}.gif'
        convert_to_gif(clip, gif_filename, gif_output_folder)

def add_text_to_clip(clip, text):
    """Adds text to the given video clip."""
    try:
        txt_clip = TextClip(text, fontsize=120, color='red', font='Arial-Bold')  # Adjust fontsize for readability
        txt_clip = txt_clip.set_pos(('center', 'bottom')).set_duration(clip.duration)
        return CompositeVideoClip([clip, txt_clip])
    except Exception as e:
        logging.error(f"Error creating TextClip with text '{text}': {e}")
        raise

def convert_to_gif(clip, gif_filename, gif_output_folder):
    """Converts a video clip to GIF and saves it."""
    try:
        gif_path = os.path.join(gif_output_folder, gif_filename)
        clip.write_gif(gif_path, fps=15, program='imageio', opt='nq', fuzz=10)
        logging.info(f"Saved GIF: {gif_path}")
    except Exception as e:
        logging.error(f"Error converting video to GIF: {e}")
        raise

def main():
    # Use tkinter to open a file dialog and select the video file
    root = Tk()
    root.withdraw()  # Hide the root window
    video_path = askopenfilename(title="Select the video file", filetypes=[("Video files", "*.mp4 *.avi *.mov")])
    if not video_path:
        logging.error("No video file selected.")
        return

    audio_path = 'audio.wav'
    output_folder = 'output_segments'
    gif_output_folder = 'gif_segments'  # New folder for saving GIFs

    try:
        # Step 1: Extract audio from video
        extract_audio_from_video(video_path, audio_path)

        # Step 2: Detect silence in the audio
        silent_intervals = detect_silence(audio_path)

        # Step 3: Generate split intervals
        audio = AudioSegment.from_file(audio_path)
        audio_duration = len(audio) / 1000
        split_intervals = generate_split_intervals(silent_intervals, audio_duration)

        # Step 4: Split audio into segments
        audio_segments = split_audio(audio, split_intervals, output_folder)

        # Step 5: Load Whisper model once
        model = whisper.load_model("base")

        # Step 6: Transcribe each audio segment using Whisper
        texts = [transcribe_audio_segment_whisper(model, segment) for segment in audio_segments]

        # Step 7: Split the video based on intervals and convert to GIF with text
        split_video(video_path, split_intervals, output_folder, texts, gif_output_folder)

    except Exception as e:
        logging.error(f"An error occurred: {e}")

if __name__ == '__main__':
    main()


TclError: no display name and no $DISPLAY environment variable

In [4]:
import os
import logging
from pydub import AudioSegment, silence
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
from google.colab import files
import whisper

# Setup logging
logging.basicConfig(level=logging.INFO)

def extract_audio_from_video(video_path, audio_path):
    """Extracts audio from the given video file and saves it to audio_path."""
    try:
        video = VideoFileClip(video_path)
        video.audio.write_audiofile(audio_path)
        logging.info(f"Extracted audio to {audio_path}")
    except Exception as e:
        logging.error(f"Error extracting audio: {e}")
        raise

def detect_silence(audio_path, silence_thresh=-40, min_silence_len=500):
    """Detects silence in the audio file and returns intervals of silence."""
    audio = AudioSegment.from_file(audio_path)
    silent_intervals = silence.detect_silence(audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh)
    silent_intervals = [(start / 1000, stop / 1000) for start, stop in silent_intervals]
    return silent_intervals

def generate_split_intervals(silent_intervals, audio_duration):
    """Generates intervals to split the audio based on detected silence."""
    intervals = []
    prev_end = 0
    for start, end in silent_intervals:
        if prev_end < start:
            intervals.append((prev_end, start))
        prev_end = end
    if prev_end < audio_duration:
        intervals.append((prev_end, audio_duration))
    return intervals

def split_audio(audio, intervals, output_folder):
    """Splits the audio into segments based on the given intervals and saves them."""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    audio_segments = []
    for i, (start, end) in enumerate(intervals):
        segment = audio[start * 1000:end * 1000]
        segment_path = os.path.join(output_folder, f'segment_{i + 1}.wav')
        segment.export(segment_path, format="wav")
        audio_segments.append(segment_path)
    return audio_segments

def transcribe_audio_segment_whisper(model, audio_path):
    """Transcribes the given audio segment using the Whisper model."""
    try:
        result = model.transcribe(audio_path, fp16=False)
        text = result['text'].strip()
        if not text:
            text = "No transcription available"
        logging.info(f"Transcribed text: {text}")
        return text
    except Exception as e:
        logging.error(f"Error transcribing audio: {e}")
        return "No transcription available"

def split_video(video_path, intervals, output_folder, texts, gif_output_folder):
    """Splits the video into segments based on the given intervals and adds transcribed text."""
    video = VideoFileClip(video_path)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    if not os.path.exists(gif_output_folder):
        os.makedirs(gif_output_folder)

    for i, (start, end) in enumerate(intervals):
        clip = video.subclip(start, end)
        text = texts[i] if texts[i] else "No transcription available"
        logging.info(f"Adding text to video segment: '{text}'")
        clip = add_text_to_clip(clip, text)
        clip_path = os.path.join(output_folder, f'word_{i + 1}.mp4')
        clip.write_videofile(clip_path, codec='libx264')
        logging.info(f"Saved video segment: {clip_path}")
        gif_filename = f'word_{i + 1}.gif'
        convert_to_gif(clip, gif_filename, gif_output_folder)

def add_text_to_clip(clip, text):
    """Adds text to the given video clip."""
    try:
        txt_clip = TextClip(text, fontsize=120, color='red', font='Arial-Bold')  # Adjust fontsize for readability
        txt_clip = txt_clip.set_pos(('center', 'bottom')).set_duration(clip.duration)
        return CompositeVideoClip([clip, txt_clip])
    except Exception as e:
        logging.error(f"Error creating TextClip with text '{text}': {e}")
        raise

def convert_to_gif(clip, gif_filename, gif_output_folder):
    """Converts a video clip to GIF and saves it."""
    try:
        gif_path = os.path.join(gif_output_folder, gif_filename)
        clip.write_gif(gif_path, fps=15, program='imageio', opt='nq', fuzz=10)
        logging.info(f"Saved GIF: {gif_path}")
    except Exception as e:
        logging.error(f"Error converting video to GIF: {e}")
        raise

def main():
    upload_widget = files.upload()
    video_path = next(iter(upload_widget.keys()))

    logging.info(f"Video file uploaded: {video_path}")

    audio_path = 'audio.wav'
    output_folder = 'output_segments'
    gif_output_folder = 'gif_segments'  # New folder for saving GIFs

    try:
        # Step 1: Extract audio from video
        extract_audio_from_video(video_path, audio_path)

        # Step 2: Detect silence in the audio
        silent_intervals = detect_silence(audio_path)

        # Step 3: Generate split intervals
        audio = AudioSegment.from_file(audio_path)
        audio_duration = len(audio) / 1000
        split_intervals = generate_split_intervals(silent_intervals, audio_duration)

        # Step 4: Split audio into segments
        audio_segments = split_audio(audio, split_intervals, output_folder)

        # Step 5: Load Whisper model once
        model = whisper.load_model("base")

        # Step 6: Transcribe each audio segment using Whisper
        texts = [transcribe_audio_segment_whisper(model, segment) for segment in audio_segments]

        # Step 7: Split the video based on intervals and convert to GIF with text
        split_video(video_path, split_intervals, output_folder, texts, gif_output_folder)

    except Exception as e:
        logging.error(f"An error occurred: {e}")

if __name__ == '__main__':
    main()


ERROR:root:Error extracting audio: MoviePy error: failed to read the duration of file gifsauto.mp4.
Here are the file infos returned by ffmpeg:

ffmpeg version 4.2.2-static https://johnvansickle.com/ffmpeg/  Copyright (c) 2000-2019 the FFmpeg developers
  built with gcc 8 (Debian 8.3.0-6)
  configuration: --enable-gpl --enable-version3 --enable-static --disable-debug --disable-ffplay --disable-indev=sndio --disable-outdev=sndio --cc=gcc --enable-fontconfig --enable-frei0r --enable-gnutls --enable-gmp --enable-libgme --enable-gray --enable-libaom --enable-libfribidi --enable-libass --enable-libvmaf --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-librubberband --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libvorbis --enable-libopus --enable-libtheora --enable-libvidstab --enable-libvo-amrwbenc --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libdav1d --en

Saving gifsauto.mp4 to gifsauto.mp4


In [2]:
!pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [3]:
!pip install whisper

Collecting whisper
  Downloading whisper-1.1.10.tar.gz (42 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: whisper
  Building wheel for whisper (setup.py) ... [?25l[?25hdone
  Created wheel for whisper: filename=whisper-1.1.10-py3-none-any.whl size=41120 sha256=96400ec7419211f70fba50cf658fa21107d1a89ae84087605ea4db6590443f32
  Stored in directory: /root/.cache/pip/wheels/aa/7c/1d/015619716e2facae6631312503baf3c3220e6a9a3508cb14b6
Successfully built whisper
Installing collected packages: whisper
Successfully installed whisper-1.1.10


In [5]:
!pip install pydub moviepy whisper-timestamped


Collecting whisper-timestamped
  Downloading whisper_timestamped-1.15.4-py3-none-any.whl (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.5/53.5 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
Collecting dtw-python (from whisper-timestamped)
  Downloading dtw_python-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (770 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m770.5/770.5 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting openai-whisper (from whisper-timestamped)
  Downloading openai-whisper-20231117.tar.gz (798 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m798.6/798.6 kB[0m [31m39.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tiktoken (from openai-whisper->whisper-timestamped)
  Downloading tiktoken-0.7.0-cp310-c

In [None]:
from google.colab import files
import logging

# Setup logging
logging.basicConfig(level=logging.INFO)

# Upload video file
uploaded = files.upload()

# Get the uploaded video file name
video_file = next(iter(uploaded.keys()))
logging.info(f"Video file uploaded: {video_file}")


In [2]:
import os
from pydub import AudioSegment, silence
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
import whisper

def extract_audio_from_video(video_path, audio_path):
    """Extracts audio from the given video file and saves it to audio_path."""
    try:
        video = VideoFileClip(video_path)
        video.audio.write_audiofile(audio_path)
        logging.info(f"Extracted audio to {audio_path}")
    except Exception as e:
        logging.error(f"Error extracting audio: {e}")
        raise

def detect_silence(audio_path, silence_thresh=-40, min_silence_len=500):
    """Detects silence in the audio file and returns intervals of silence."""
    audio = AudioSegment.from_file(audio_path)
    silent_intervals = silence.detect_silence(audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh)
    silent_intervals = [(start / 1000, stop / 1000) for start, stop in silent_intervals]
    return silent_intervals

def generate_split_intervals(silent_intervals, audio_duration):
    """Generates intervals to split the audio based on detected silence."""
    intervals = []
    prev_end = 0
    for start, end in silent_intervals:
        if prev_end < start:
            intervals.append((prev_end, start))
        prev_end = end
    if prev_end < audio_duration:
        intervals.append((prev_end, audio_duration))
    return intervals

def split_audio(audio, intervals, output_folder):
    """Splits the audio into segments based on the given intervals and saves them."""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    audio_segments = []
    for i, (start, end) in enumerate(intervals):
        segment = audio[start * 1000:end * 1000]
        segment_path = os.path.join(output_folder, f'segment_{i + 1}.wav')
        segment.export(segment_path, format="wav")
        audio_segments.append(segment_path)
    return audio_segments

def transcribe_audio_segment_whisper(model, audio_path):
    """Transcribes the given audio segment using the Whisper model."""
    try:
        result = model.transcribe(audio_path, fp16=False)
        text = result['text'].strip()
        if not text:
            text = "No transcription available"
        logging.info(f"Transcribed text: {text}")
        return text
    except Exception as e:
        logging.error(f"Error transcribing audio: {e}")
        return "No transcription available"

def split_video(video_path, intervals, output_folder, texts, gif_output_folder):
    """Splits the video into segments based on the given intervals and adds transcribed text."""
    video = VideoFileClip(video_path)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    if not os.path.exists(gif_output_folder):
        os.makedirs(gif_output_folder)

    for i, (start, end) in enumerate(intervals):
        clip = video.subclip(start, end)
        text = texts[i] if texts[i] else "No transcription available"
        logging.info(f"Adding text to video segment: '{text}'")
        clip = add_text_to_clip(clip, text)
        clip_path = os.path.join(output_folder, f'word_{i + 1}.mp4')
        clip.write_videofile(clip_path, codec='libx264')
        logging.info(f"Saved video segment: {clip_path}")
        gif_filename = f'word_{i + 1}.gif'
        convert_to_gif(clip, gif_filename, gif_output_folder)

def add_text_to_clip(clip, text):
    """Adds text to the given video clip."""
    try:
        txt_clip = TextClip(text, fontsize=120, color='red', font='Arial-Bold')  # Adjust fontsize for readability
        txt_clip = txt_clip.set_pos(('center', 'bottom')).set_duration(clip.duration)
        return CompositeVideoClip([clip, txt_clip])
    except Exception as e:
        logging.error(f"Error creating TextClip with text '{text}': {e}")
        raise

def convert_to_gif(clip, gif_filename, gif_output_folder):
    """Converts a video clip to GIF and saves it."""
    try:
        gif_path = os.path.join(gif_output_folder, gif_filename)
        clip.write_gif(gif_path, fps=15, program='imageio', opt='nq', fuzz=10)
        logging.info(f"Saved GIF: {gif_path}")
    except Exception as e:
        logging.error(f"Error converting video to GIF: {e}")
        raise

def main():
    video_path = video_file
    audio_path = 'audio.wav'
    output_folder = 'output_segments'
    gif_output_folder = 'gif_segments'  # New folder for saving GIFs

    try:
        # Step 1: Extract audio from video
        extract_audio_from_video(video_path, audio_path)

        # Step 2: Detect silence in the audio
        silent_intervals = detect_silence(audio_path)

        # Step 3: Generate split intervals
        audio = AudioSegment.from_file(audio_path)
        audio_duration = len(audio) / 1000
        split_intervals = generate_split_intervals(silent_intervals, audio_duration)

        # Step 4: Split audio into segments
        audio_segments = split_audio(audio, split_intervals, output_folder)

        # Step 5: Load Whisper model once
        model = whisper.load_model("base")

        # Step 6: Transcribe each audio segment using Whisper
        texts = [transcribe_audio_segment_whisper(model, segment) for segment in audio_segments]

        # Step 7: Split the video based on intervals and convert to GIF with text
        split_video(video_path, split_intervals, output_folder, texts, gif_output_folder)

    except Exception as e:
        logging.error(f"An error occurred: {e}")

if __name__ == '__main__':
    main()


ERROR:root:Error extracting audio: MoviePy error: failed to read the duration of file gifsauto (1).mp4.
Here are the file infos returned by ffmpeg:

ffmpeg version 4.2.2-static https://johnvansickle.com/ffmpeg/  Copyright (c) 2000-2019 the FFmpeg developers
  built with gcc 8 (Debian 8.3.0-6)
  configuration: --enable-gpl --enable-version3 --enable-static --disable-debug --disable-ffplay --disable-indev=sndio --disable-outdev=sndio --cc=gcc --enable-fontconfig --enable-frei0r --enable-gnutls --enable-gmp --enable-libgme --enable-gray --enable-libaom --enable-libfribidi --enable-libass --enable-libvmaf --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-librubberband --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libvorbis --enable-libopus --enable-libtheora --enable-libvidstab --enable-libvo-amrwbenc --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libdav1d 

In [3]:
import os
from pydub import AudioSegment, silence
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
import whisper
import logging

# Setup logging
logging.basicConfig(level=logging.INFO)

def extract_audio_from_video(video_path, audio_path):
    """Extracts audio from the given video file and saves it to audio_path."""
    try:
        video = VideoFileClip(video_path)
        video.audio.write_audiofile(audio_path)
        logging.info(f"Extracted audio to {audio_path}")
    except Exception as e:
        logging.error(f"Error extracting audio: {e}")
        raise

def detect_silence(audio_path, silence_thresh=-40, min_silence_len=500):
    """Detects silence in the audio file and returns intervals of silence."""
    audio = AudioSegment.from_file(audio_path)
    silent_intervals = silence.detect_silence(audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh)
    silent_intervals = [(start / 1000, stop / 1000) for start, stop in silent_intervals]
    return silent_intervals

def generate_split_intervals(silent_intervals, audio_duration):
    """Generates intervals to split the audio based on detected silence."""
    intervals = []
    prev_end = 0
    for start, end in silent_intervals:
        if prev_end < start:
            intervals.append((prev_end, start))
        prev_end = end
    if prev_end < audio_duration:
        intervals.append((prev_end, audio_duration))
    return intervals

def split_audio(audio, intervals, output_folder):
    """Splits the audio into segments based on the given intervals and saves them."""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    audio_segments = []
    for i, (start, end) in enumerate(intervals):
        segment = audio[start * 1000:end * 1000]
        segment_path = os.path.join(output_folder, f'segment_{i + 1}.wav')
        segment.export(segment_path, format="wav")
        audio_segments.append(segment_path)
    return audio_segments

def transcribe_audio_segment_whisper(model, audio_path):
    """Transcribes the given audio segment using the Whisper model."""
    try:
        result = model.transcribe(audio_path, fp16=False)
        text = result['text'].strip()
        if not text:
            text = "No transcription available"
        logging.info(f"Transcribed text: {text}")
        return text
    except Exception as e:
        logging.error(f"Error transcribing audio: {e}")
        return "No transcription available"

def split_video(video_path, intervals, output_folder, texts, gif_output_folder):
    """Splits the video into segments based on the given intervals and adds transcribed text."""
    video = VideoFileClip(video_path)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    if not os.path.exists(gif_output_folder):
        os.makedirs(gif_output_folder)

    for i, (start, end) in enumerate(intervals):
        clip = video.subclip(start, end)
        text = texts[i] if texts[i] else "No transcription available"
        logging.info(f"Adding text to video segment: '{text}'")
        clip = add_text_to_clip(clip, text)
        clip_path = os.path.join(output_folder, f'word_{i + 1}.mp4')
        clip.write_videofile(clip_path, codec='libx264')
        logging.info(f"Saved video segment: {clip_path}")
        gif_filename = f'word_{i + 1}.gif'
        convert_to_gif(clip, gif_filename, gif_output_folder)

def add_text_to_clip(clip, text):
    """Adds text to the given video clip."""
    try:
        txt_clip = TextClip(text, fontsize=120, color='red', font='Arial-Bold')  # Adjust fontsize for readability
        txt_clip = txt_clip.set_pos(('center', 'bottom')).set_duration(clip.duration)
        return CompositeVideoClip([clip, txt_clip])
    except Exception as e:
        logging.error(f"Error creating TextClip with text '{text}': {e}")
        raise

def convert_to_gif(clip, gif_filename, gif_output_folder):
    """Converts a video clip to GIF and saves it."""
    try:
        gif_path = os.path.join(gif_output_folder, gif_filename)
        clip.write_gif(gif_path, fps=15, program='imageio', opt='nq', fuzz=10)
        logging.info(f"Saved GIF: {gif_path}")
    except Exception as e:
        logging.error(f"Error converting video to GIF: {e}")
        raise

def main():
    video_path = video_file
    audio_path = 'audio.wav'
    output_folder = 'output_segments'
    gif_output_folder = 'gif_segments'  # New folder for saving GIFs

    try:
        # Step 1: Extract audio from video
        extract_audio_from_video(video_path, audio_path)

        # Step 2: Detect silence in the audio
        silent_intervals = detect_silence(audio_path)

        # Step 3: Generate split intervals
        audio = AudioSegment.from_file(audio_path)
        audio_duration = len(audio) / 1000
        split_intervals = generate_split_intervals(silent_intervals, audio_duration)

        # Step 4: Split audio into segments
        audio_segments = split_audio(audio, split_intervals, output_folder)

        # Step 5: Load Whisper model once
        model = whisper.load_model("base")

        # Step 6: Transcribe each audio segment using Whisper
        texts = [transcribe_audio_segment_whisper(model, segment) for segment in audio_segments]

        # Step 7: Split the video based on intervals and convert to GIF with text
        split_video(video_path, split_intervals, output_folder, texts, gif_output_folder)

    except Exception as e:
        logging.error(f"An error occurred: {e}")

if __name__ == '__main__':
    main()


ERROR:root:Error extracting audio: MoviePy error: failed to read the duration of file gifsauto (1).mp4.
Here are the file infos returned by ffmpeg:

ffmpeg version 4.2.2-static https://johnvansickle.com/ffmpeg/  Copyright (c) 2000-2019 the FFmpeg developers
  built with gcc 8 (Debian 8.3.0-6)
  configuration: --enable-gpl --enable-version3 --enable-static --disable-debug --disable-ffplay --disable-indev=sndio --disable-outdev=sndio --cc=gcc --enable-fontconfig --enable-frei0r --enable-gnutls --enable-gmp --enable-libgme --enable-gray --enable-libaom --enable-libfribidi --enable-libass --enable-libvmaf --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-librubberband --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libvorbis --enable-libopus --enable-libtheora --enable-libvidstab --enable-libvo-amrwbenc --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libdav1d 

In [None]:
from google.colab import files
import logging

# Setup logging
logging.basicConfig(level=logging.INFO)

# Upload video file
uploaded = files.upload()

# Get the uploaded video file name
video_file = next(iter(uploaded.keys()))
logging.info(f"Video file uploaded: {video_file}")
