In [4]:
!pip install --upgrade flask werkzeug






In [5]:
!pip install flask moviepy pydub whisper opencv-python




In [6]:
pip install regex





In [7]:
pip install pytube





In [9]:
import warnings
warnings.filterwarnings("ignore")

In [8]:
pip install numba

Note: you may need to restart the kernel to use updated packages.


In [12]:
# # Set the environment variable to avoid OpenMP runtime error
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'

from flask import Flask, render_template, request, redirect, url_for, send_from_directory
import whisper
from pydub import AudioSegment, silence
from moviepy.editor import VideoFileClip
import cv2

app = Flask(__name__, template_folder='te')
UPLOAD_FOLDER = 'uploaded_videos'
PROCESSED_FOLDER = 'processed_gifs'
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.config['PROCESSED_FOLDER'] = PROCESSED_FOLDER

if not os.path.exists(UPLOAD_FOLDER):
    os.makedirs(UPLOAD_FOLDER)
if not os.path.exists(PROCESSED_FOLDER):
    os.makedirs(PROCESSED_FOLDER)

# Load the Whisper model
import os
from moviepy.editor import VideoFileClip
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
from pydub import AudioSegment, silence
import whisper
import cv2
import logging

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Load the Whisper model
logger.info("Loading Whisper model...")
model = whisper.load_model("base")  # Load the default model
logger.info("Model loaded.")

def detect_silence(audio_path, silence_thresh=-40, min_silence_len=500):
    audio = AudioSegment.from_file(audio_path)
    logger.info("Analyzing audio for silence...")
    silent_ranges = silence.detect_silence(
        audio, 
        min_silence_len=min_silence_len,
        silence_thresh=silence_thresh
    )
    silent_ranges = [(start / 1000, end / 1000) for start, end in silent_ranges]  # Convert to seconds
    return silent_ranges

def get_non_silent_segments(silent_segments, video_duration):
    non_silent_segments = []
    previous_end = 0
    for start, end in silent_segments:
        if start > previous_end:
            non_silent_segments.append((previous_end, start))
        previous_end = end
    if previous_end < video_duration:
        non_silent_segments.append((previous_end, video_duration))
    return non_silent_segments

def cut_video_segment(video_path, start_time, end_time, output_path):
    logger.info(f"Cutting video segment: start_time={start_time}, end_time={end_time}, output_path={output_path}")
    try:
        ffmpeg_extract_subclip(video_path, start_time, end_time, targetname=output_path)
        logger.info("Video segment cut.")
    except Exception as e:
        logger.error(f"Error cutting video segment: {e}")

def convert_video_to_audio(video_path, audio_path):
    try:
        video_clip = VideoFileClip(video_path)
        video_clip.audio.write_audiofile(audio_path)
        video_clip.close()
    except Exception as e:
        logger.error(f"Error converting video to audio: {e}")

def transcribe_audio(audio_path):
    try:
        result = model.transcribe(audio_path)
        return result['text']
    except Exception as e:
        logger.error(f"Error transcribing audio: {e}")
        return ""

def overlay_text_on_video(video_path, output_path, text):
    try:
        cap = cv2.VideoCapture(video_path)
        fps = cap.get(cv2.CAP_PROP_FPS)
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

        out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

        while True:
            ret, frame = cap.read()
            if not ret:
                break
            
            font_scale = 3
            thickness = 10
            color = (255, 255, 255)
            text_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)[0]
            text_x = int((width - text_size[0]) / 2)
            text_y = height - 100
            cv2.putText(frame, text, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, font_scale, color, thickness, cv2.LINE_AA)

            out.write(frame)

        cap.release()
        out.release()
    except Exception as e:
        logger.error(f"Error overlaying text on video: {e}")

def convert_video_to_gif(input_video_path, output_gif_path):
    try:
        clip = VideoFileClip(input_video_path)
        clip.write_gif(output_gif_path)
        clip.close()
    except Exception as e:
        logger.error(f"Error converting video to GIF: {e}")

def process_video(video_path, output_dir, silence_thresh=-40, min_silence_len=500):
    audio_path = "temp_audio.wav"
    
    logger.info(f"Extracting audio from video: {video_path}")
    try:
        video = VideoFileClip(video_path)
        video.audio.write_audiofile(audio_path)
        video.close()
    except Exception as e:
        logger.error(f"Error extracting audio from video: {e}")
        return []
    
    try:
        silent_segments = detect_silence(audio_path, silence_thresh, min_silence_len)
        logger.info(f"Silent segments detected: {silent_segments}")
    except Exception as e:
        logger.error(f"Error detecting silence: {e}")
        return []
    
    os.remove(audio_path)
    
    video_duration = VideoFileClip(video_path).duration  # Reload video to get duration
    non_silent_segments = get_non_silent_segments(silent_segments, video_duration)
    logger.info(f"Non-silent segments: {non_silent_segments}")
    
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    output_files = []
    for i, (start_time, end_time) in enumerate(non_silent_segments):
        video_segment_path = f"{output_dir}/segment_{i}.mp4"
        cut_video_segment(video_path, start_time, end_time, video_segment_path)
        
        segment_audio_path = f"{output_dir}/segment_{i}_audio.wav"
        convert_video_to_audio(video_segment_path, segment_audio_path)
        transcription = transcribe_audio(segment_audio_path)
        video_with_text_path = f"{output_dir}/segment_{i}_with_text.mp4"
        overlay_text_on_video(video_segment_path, video_with_text_path, transcription)
        
        gif_path = f"{output_dir}/segment_{i}.gif"
        convert_video_to_gif(video_with_text_path, gif_path)
        
        try:
            os.remove(video_segment_path)
            os.remove(segment_audio_path)
            os.remove(video_with_text_path)
        except Exception as e:
            logger.error(f"Error removing intermediate files: {e}")
        
        output_files.append(gif_path)
    
    return output_files

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/upload', methods=['POST'])
def upload_video():
    if 'file' not in request.files:
        return redirect(request.url)
    file = request.files['file']
    if file.filename == '':
        return redirect(request.url)
    if file:
        file_path = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
        file.save(file_path)
        process_video(file_path, app.config['PROCESSED_FOLDER'])
        return redirect(url_for('processed_files'))

@app.route('/processed_files')
def processed_files():
    files = os.listdir(app.config['PROCESSED_FOLDER'])
    return render_template('processed_files.html', files=files)

@app.route('/processed_files/<filename>')
def processed_file(filename):
    return send_from_directory(app.config['PROCESSED_FOLDER'], filename)

if __name__ == '__main__':
    app.run(debug=True)


INFO:__main__:Loading Whisper model...
INFO:__main__:Model loaded.


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with stat
INFO:werkzeug:127.0.0.1 - - [27/Jun/2024 21:31:38] "GET / HTTP/1.1" 200 -


SystemExit: 1

In [None]:
from werkzeug.serving import run_simple

def run_flask_app():
    run_simple('localhost', 5000, app)

import threading
threading.Thread(target=run_flask_app).start()


INFO:__main__:Extracting audio from video: uploaded_videos\test_video.mp4


MoviePy - Writing audio in temp_audio.wav


INFO:__main__:Analyzing audio for silence...                     


MoviePy - Done.


INFO:__main__:Silent segments detected: [(0.0, 0.723), (1.689, 2.35)]
INFO:__main__:Non-silent segments: [(0.723, 1.689)]
INFO:__main__:Cutting video segment: start_time=0.723, end_time=1.689, output_path=processed_gifs/segment_0.mp4


Moviepy - Running:
>>> "+ " ".join(cmd)


INFO:__main__:Video segment cut.


Moviepy - Command successful
MoviePy - Writing audio in processed_gifs/segment_0_audio.wav


                                                               

MoviePy - Done.




MoviePy - Building file processed_gifs/segment_0.gif with imageio.


INFO:werkzeug:127.0.0.1 - - [27/Jun/2024 21:33:26] "[32mPOST /upload HTTP/1.1[0m" 302 -
INFO:werkzeug:127.0.0.1 - - [27/Jun/2024 21:33:26] "GET /processed_files HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [27/Jun/2024 21:33:29] "GET /processed_files/segment_0.gif HTTP/1.1" 200 -
