In [None]:
# Core Python packages
!pip install flask flask-cors pyngrok googletrans==4.0.0-rc1 gTTS pydub openai-whisper ffmpeg-python

# System dependency for audio/video
!apt-get install -y ffmpeg

!pip install webvtt-py

Collecting flask-cors
  Downloading flask_cors-6.0.1-py3-none-any.whl.metadata (5.3 kB)
Collecting pyngrok
  Downloading pyngrok-7.3.0-py3-none-any.whl.metadata (8.1 kB)
Collecting googletrans==4.0.0-rc1
  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gTTS
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Collecting openai-whisper
  Downloading openai_whisper-20250625.tar.gz (803 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m803.2/803.2 kB[0m [31m43.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Collecting httpx==0.13.3 (from googletrans==4.0.0-rc1)
  Downloading httpx-0.13.3-py3-none-any.whl.metadata (25 kB)
Collecting hstspreload (from 

In [None]:
from flask import Flask, request, jsonify, send_from_directory
from flask_cors import CORS
import os, uuid, tempfile, shutil, traceback, json, gc
import whisper
from googletrans import Translator
from pyngrok import ngrok, conf
from pydub import AudioSegment
from gtts import gTTS
import ffmpeg
from webvtt import WebVTT, Caption
import logging

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# -------------------- App Setup --------------------
app = Flask(__name__)
# Enhanced CORS configuration
CORS(app, resources={
    r"/*": {
        "origins": ["*"],
        "methods": ["GET", "POST", "OPTIONS"],
        "allow_headers": ["Content-Type", "ngrok-skip-browser-warning"]
    }
})

UPLOAD_DIR = "uploads"
OUTPUT_DIR = "outputs"
os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)

# -------------------- Load Models --------------------
print("Loading Whisper model...")
# Using base model to reduce memory usage
asr_model = whisper.load_model("base")
print("Whisper loaded.")

translator = Translator()
public_url = None

# Supported languages for the frontend
SUPPORTED_LANGUAGES = {
    "en": {"name": "English", "tts_support": True},
    "es": {"name": "Spanish", "tts_support": True},
    "fr": {"name": "French", "tts_support": True},
    "de": {"name": "German", "tts_support": True},
    "it": {"name": "Italian", "tts_support": True},
    "pt": {"name": "Portuguese", "tts_support": True},
    "ja": {"name": "Japanese", "tts_support": True},
    "ko": {"name": "Korean", "tts_support": True},
    "zh-cn": {"name": "Chinese (Simplified)", "tts_support": True},
    "ru": {"name": "Russian", "tts_support": True},
    "ar": {"name": "Arabic", "tts_support": True},
    "hi": {"name": "Hindi", "tts_support": True},
    "ur": {"name": "Urdu", "tts_support": True},
}

# Available voices (simplified for this implementation)
AVAILABLE_VOICES = [
    {"id": "default", "name": "Default Voice", "language": "en", "type": "gtts"},
]

# -------------------- Helper Functions --------------------
def read_duration(wav_path):
    return AudioSegment.from_file(wav_path).duration_seconds

def extract_audio(input_video_path, out_wav_path, sr=16000):
    ffmpeg.input(input_video_path).output(out_wav_path, ac=1, ar=sr, loglevel="error").overwrite_output().run()

def video_duration(input_video_path):
    temp_wav = input_video_path + "_tmp.wav"
    extract_audio(input_video_path, temp_wav)
    dur = read_duration(temp_wav)
    os.remove(temp_wav)
    return dur

def overlay_segments(segments, segment_wavs, total_duration_s, bg_audio_path=None):
    target_sr = 16000
    total_ms = int(total_duration_s * 1000)
    dubbed = AudioSegment.silent(duration=total_ms, frame_rate=target_sr)

    for seg, wav_path in zip(segments, segment_wavs):
        start_ms = int(seg["start"] * 1000)
        seg_audio = AudioSegment.from_file(wav_path).set_frame_rate(target_sr)
        dubbed = dubbed.overlay(seg_audio, position=start_ms, gain_during_overlay=-6)

    if bg_audio_path and os.path.exists(bg_audio_path):
        bg = AudioSegment.from_file(bg_audio_path).set_frame_rate(target_sr)
        bg = bg[:total_ms] if len(bg) > total_ms else bg + AudioSegment.silent(total_ms - len(bg))
        bg = bg - 18
        dubbed = dubbed.overlay(bg)

    return dubbed

def generate_subtitles_file(segments, output_path):
    vtt = WebVTT()
    for seg in segments:
        caption = Caption(
            f"{seg['start']:.3f}".replace('.', ','),
            f"{seg['end']:.3f}".replace('.', ','),
            seg['text']
        )
        vtt.captions.append(caption)

    vtt.save(output_path)

# -------------------- API Endpoints --------------------
@app.route('/languages', methods=['GET', 'OPTIONS'])
def get_languages():
    if request.method == 'OPTIONS':
        return '', 200
    return jsonify({"supported_languages": SUPPORTED_LANGUAGES})

@app.route('/voices', methods=['GET', 'OPTIONS'])
def get_voices():
    if request.method == 'OPTIONS':
        return '', 200
    return jsonify(AVAILABLE_VOICES)

@app.route('/dub', methods=['POST', 'OPTIONS'])
def dub_video():
    if request.method == 'OPTIONS':
        return '', 200

    try:
        if 'video' not in request.files:
            return jsonify({"error": "No video uploaded"}), 400

        # Check file size (max 50MB)
        video = request.files['video']
        video.seek(0, os.SEEK_END)
        file_size = video.tell()
        video.seek(0)

        MAX_FILE_SIZE = 50 * 1024 * 1024  # 50MB
        if file_size > MAX_FILE_SIZE:
            return jsonify({"error": f"File too large. Max size: {MAX_FILE_SIZE//1024//1024}MB"}), 400

        language = request.form.get('language', 'en').strip()
        keep_bg = request.form.get('keep_background', 'true').lower() == 'true'
        generate_subtitles = request.form.get('generate_subtitles', 'false').lower() == 'true'

        # Parse voice preferences if provided
        voice_preferences = {}
        if 'voice_preferences' in request.form:
            try:
                voice_preferences = json.loads(request.form['voice_preferences'])
            except:
                pass  # Use default if parsing fails

        if not video.filename.lower().endswith('.mp4'):
            return jsonify({"error": "Only MP4 files supported"}), 400

        with tempfile.TemporaryDirectory() as tmp:
            file_id = str(uuid.uuid4())
            input_video = os.path.join(tmp, f"{file_id}.mp4")
            orig_wav = os.path.join(tmp, f"{file_id}_orig.wav")
            dubbed_wav = os.path.join(tmp, f"{file_id}_dubbed.wav")
            output_video = os.path.join(OUTPUT_DIR, f"{file_id}_dubbed.mp4")
            subtitle_file = os.path.join(OUTPUT_DIR, f"{file_id}_subtitles.vtt")

            video.save(input_video)

            # Free memory after saving
            del video
            gc.collect()

            total_dur = video_duration(input_video)
            extract_audio(input_video, orig_wav)

            # 1. Transcribe
            result = asr_model.transcribe(orig_wav, fp16=False, word_timestamps=True)
            segments = result.get("segments", [])
            if not segments:
                return jsonify({"error": "No speech detected"}), 400

            # 2. Translate
            translated_segments = []
            for seg in segments:
                text = seg.get("text", "").strip()
                if text:
                    try:
                        tr = translator.translate(text, dest=language, src="auto")
                        translated_segments.append({
                            "start": seg["start"],
                            "end": seg["end"],
                            "text": tr.text,
                            "original_text": text
                        })
                    except Exception as e:
                        logger.error(f"Translation error: {e}")
                        # Fallback to original text if translation fails
                        translated_segments.append({
                            "start": seg["start"],
                            "end": seg["end"],
                            "text": text,
                            "original_text": text
                        })

            # 3. TTS for each segment (with gTTS)
            seg_wavs = []
            for i, seg in enumerate(translated_segments):
                seg_id = f"{file_id}_seg{i}"
                tts_path = os.path.join(tmp, f"{seg_id}.wav")

                try:
                    tts = gTTS(text=seg["text"], lang=language)
                    tts.save(tts_path)
                    seg_wavs.append(tts_path)
                except Exception as e:
                    logger.error(f"TTS error: {e}")
                    # Skip this segment if TTS fails
                    continue

            # 4. Overlay audio segments
            mixed = overlay_segments(translated_segments, seg_wavs, total_duration_s=total_dur,
                                     bg_audio_path=orig_wav if keep_bg else None)
            mixed.export(dubbed_wav, format="wav")

            # 5. Mux video + audio
            video_stream = ffmpeg.input(input_video).video
            audio_stream = ffmpeg.input(dubbed_wav).audio
            ffmpeg.output(video_stream, audio_stream, output_video, vcodec='copy', acodec='aac').overwrite_output().run()

            # 6. Generate subtitles if requested
            subtitle_url = None
            if generate_subtitles:
                generate_subtitles_file(translated_segments, subtitle_file)
                subtitle_url = f"{public_url}/outputs/{os.path.basename(subtitle_file)}"

            result_url = f"{public_url}/outputs/{os.path.basename(output_video)}"

            # Return response with empty speakers object (frontend expects this)
            return jsonify({
                "result_url": result_url,
                "subtitle_url": subtitle_url,
                "speakers": {}  # Simplified implementation - no speaker detection
            })

    except Exception as e:
        traceback.print_exc()
        return jsonify({"error": str(e)}), 500

@app.route('/outputs/<path:filename>')
def serve_output(filename):
    response = send_from_directory(OUTPUT_DIR, filename)
    response.headers.add('Access-Control-Allow-Origin', '*')
    return response

# -------------------- Ngrok --------------------
def start_ngrok():
    global public_url
    try:
        # Replace with your actual ngrok auth token
        conf.get_default().auth_token = "Your_NGGROK TOKEN"
        tunnel = ngrok.connect(addr=5000, hostname="YOUR_DOMAIN_NAME")
        public_url = tunnel.public_url

        print(f" * Ngrok running: {public_url}")
    except Exception as e:
        print(f"Ngrok error: {e}")
        # Fallback to localhost
        public_url = "http://localhost:5000"
        print(f"Using local URL: {public_url}")

# -------------------- Run Server --------------------
if __name__ == "__main__":
    start_ngrok()
    shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    app.run(host="0.0.0.0", port=5000, debug=False, threaded=True)

Loading Whisper model...
Whisper loaded.
 * Ngrok running: https://cicada-together-marginally.ngrok-free.app
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [30/Aug/2025 04:38:54] "OPTIONS /dub HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [30/Aug/2025 04:39:26] "POST /dub HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [30/Aug/2025 04:39:27] "[35m[1mGET /outputs/0712dcc1-fe48-4035-bcee-9f97c1dae826_dubbed.mp4 HTTP/1.1[0m" 206 -
INFO:werkzeug:127.0.0.1 - - [30/Aug/2025 04:39:28] "[35m[1mGET /outputs/0712dcc1-fe48-4035-bcee-9f97c1dae826_dubbed.mp4 HTTP/1.1[0m" 206 -
INFO:werkzeug:127.0.0.1 - - [30/Aug/2025 04:39:29] "[35m[1mGET /outputs/0712dcc1-fe48-4035-bcee-9f97c1dae826_dubbed.mp4 HTTP/1.1[0m" 206 -
INFO:werkzeug:127.0.0.1 - - [30/Aug/2025 04:39:30] "[35m[1mGET /outputs/0712dcc1-fe48-4035-bcee-9f97c1dae826_dubbed.mp4 HTTP/1.1[0m" 206 -
INFO:werkzeug:127.0.0.1 - - [30/Aug/2025 04:41:31] "[35m[1mGET /outputs/0712dcc1-fe48-4035-bcee-9f97c1dae826_dubbed

In [None]:
pip install --upgrade numba

