# TikTok Transcription Server (Colab)

This notebook acts as a remote transcription server. Run all cells to start the API.

In [1]:
# @title 1. Install Dependencies
!pip install -q yt-dlp git+https://github.com/openai/whisper.git fastapi uvicorn pyngrok nest_asyncio python-multipart
!apt-get install -y ffmpeg

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 41 not upgraded.


In [2]:
# @title 2. Load Whisper Model
import whisper
import torch

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Loading model on {DEVICE}...")
model = whisper.load_model("medium.en").to(DEVICE)
print("Model loaded!")

Loading model on cuda...
Model loaded!


In [3]:
import os
import re
import uuid
import requests
import subprocess

# Optional: pip install yt-dlp
import yt_dlp  # requires: pip install yt-dlp


YOUTUBE_RE = re.compile(
    r'^(?:(?:https?:)?//)?(?:www\.)?(?:youtube\.com|youtu\.be)/.+',
    re.IGNORECASE
)
TIKTOK_RE = re.compile(
    r'^(?:(?:https?:)?//)?(?:www\.)?(?:tiktok\.com|vm\.tiktok\.com|m\.tiktok\.com)/.+',
    re.IGNORECASE
)


def _extract_audio_ffmpeg(video_file: str, audio_file: str):
    ffmpeg_cmd = [
        "ffmpeg",
        "-i", video_file,
        "-vn",
        "-acodec", "libmp3lame",
        "-q:a", "2",
        "-y",
        audio_file
    ]
    result = subprocess.run(ffmpeg_cmd, capture_output=True, text=True)
    if result.returncode != 0:
        raise RuntimeError(f"FFmpeg failed: {result.stderr}")

    # Verify audio stream exists
    check_audio = subprocess.run(
        ["ffprobe", "-i", audio_file, "-show_streams", "-select_streams", "a", "-loglevel", "error"],
        capture_output=True,
        text=True
    )
    if not check_audio.stdout.strip():
        raise ValueError(f"Extracted audio has no audio stream: {audio_file}")


def _download_tiktok_tikwm(video_url: str, video_file: str):
    api_url = "https://www.tikwm.com/api/"
    response = requests.post(
        api_url,
        data={"url": video_url, "hd": 1},
        headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
    )
    response.raise_for_status()
    data = response.json()

    if data.get("code") != 0:
        raise ValueError(f"TikWM API error: {data.get('msg', 'Unknown error')}")

    video_download_url = data["data"].get("hdplay") or data["data"].get("play")
    if not video_download_url:
        raise ValueError("No video download URL found in TikWM API response")

    video_response = requests.get(video_download_url, stream=True)
    video_response.raise_for_status()

    with open(video_file, "wb") as f:
        for chunk in video_response.iter_content(chunk_size=8192):
            f.write(chunk)


def _download_youtube_ytdlp(video_url: str, video_file: str):
    # bestvideo+bestaudio merges streams (needs ffmpeg installed) [web:5]
    ydl_opts = {
        "format": "bestvideo+bestaudio/best",
        "outtmpl": video_file,          # exact output path
        "merge_output_format": "mp4",   # ensures mp4 container after merge [web:5]
        "noplaylist": True,
        "quiet": True,
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([video_url])

    if not os.path.exists(video_file):
        raise FileNotFoundError(f"yt-dlp did not produce output file: {video_file}")


def download_and_transcribe(video_url: str):
    """
    Downloads TikTok via TikWM or YouTube via yt-dlp, extracts verified MP3 audio,
    then transcribes with Whisper model.transcribe().
    """
    unique_id = str(uuid.uuid4())
    audio_file = f"temp_{unique_id}.mp3"
    video_file = f"temp_{unique_id}.mp4"

    try:
        if TIKTOK_RE.match(video_url):
            print(f"Detected TikTok URL. Downloading via TikWM: {video_url}...")
            _download_tiktok_tikwm(video_url, video_file)
            print(f"✓ Downloaded (TikWM): {video_file}")

        elif YOUTUBE_RE.match(video_url):
            print(f"Detected YouTube URL. Downloading via yt-dlp: {video_url}...")
            _download_youtube_ytdlp(video_url, video_file)
            print(f"✓ Downloaded (yt-dlp): {video_file}")

        else:
            raise ValueError("Unsupported URL. Provide a TikTok or YouTube link.")

        print("Extracting audio (FFmpeg)...")
        _extract_audio_ffmpeg(video_file, audio_file)
        print(f"✓ Audio extracted + verified: {audio_file}")

        print(f"Transcribing {audio_file}...")
        result = model.transcribe(audio_file)
        return result["text"].strip()

    finally:
        for temp_file in [audio_file, video_file]:
            try:
                if os.path.exists(temp_file):
                    os.remove(temp_file)
            except Exception:
                pass


In [None]:
# @title 4. Start Server
import nest_asyncio
from fastapi import FastAPI, HTTPException, BackgroundTasks
from pydantic import BaseModel
import uvicorn
from pyngrok import ngrok
import uuid
from typing import Dict, Any

# @markdown Enter your Ngrok Authtoken here:
NGROK_AUTHTOKEN = "38deD6VnibGxmEFkqkQuYJfrKSc_6Ns4ZxAwQA5WvDrbnJHkD" # @param {type:"string"}
if not NGROK_AUTHTOKEN:
    raise ValueError("Please provide an Ngrok Authtoken. Get it from https://dashboard.ngrok.com/get-started/your-authtoken")

ngrok.set_auth_token(NGROK_AUTHTOKEN)

app = FastAPI()

# In-memory job store
# Structure: { job_id: { "status": "pending"|"processing"|"completed"|"error", "text": ..., "error": ... } }
JOBS: Dict[str, Dict[str, Any]] = {}

class VideoRequest(BaseModel):
    url: str

def process_transcription(job_id: str, url: str):
    """
    Worker function to run the heavy transcription task.
    Updates the global JOBS dict.
    """
    try:
        print(f"[Job {job_id}] Starting transcription for {url}")
        JOBS[job_id]["status"] = "processing"
        
        # This is the blocking call from previous steps
        text = download_and_transcribe(url)
        
        JOBS[job_id]["text"] = text
        JOBS[job_id]["status"] = "completed"
        print(f"[Job {job_id}] Completed successfully.")
        
    except Exception as e:
        print(f"[Job {job_id}] Failed: {e}")
        JOBS[job_id]["status"] = "error"
        JOBS[job_id]["error"] = str(e)

@app.post("/transcribe")
async def start_transcription(req: VideoRequest, background_tasks: BackgroundTasks):
    """
    Starts a transcription job in the background and returns a job_id immediately.
    """
    job_id = str(uuid.uuid4())
    JOBS[job_id] = {"status": "pending", "text": None, "error": None}
    
    background_tasks.add_task(process_transcription, job_id, req.url)
    
    return {"job_id": job_id, "status": "pending"}

@app.get("/job/{job_id}")
async def get_job_status(job_id: str):
    """
    Poll this endpoint to check job status.
    """
    job = JOBS.get(job_id)
    if not job:
        raise HTTPException(status_code=404, detail="Job not found")
    return job

@app.get("/health_check()")
async def health_check():
    return {"status": "alive"}

# Open Tunnel
try:
    ngrok_tunnel = ngrok.connect(8000)
    print("
=======================================================")
    print(f"PUBLIC URL: {ngrok_tunnel.public_url}")
    print("Copy this URL and paste it into your local Receiver App")
    print("=======================================================
")
except Exception as e:
    print(f"Ngrok Error: {e}")
    raise e

# Run Uvicorn
config = uvicorn.Config(app, port=8000)
server = uvicorn.Server(config)
await server.serve()


INFO:     Started server process [32255]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)



PUBLIC URL: https://tartily-mensural-elaine.ngrok-free.dev
Copy this URL and paste it into your local Receiver App

Detected YouTube URL. Downloading via yt-dlp: https://www.youtube.com/watch?v=lXUZvyajciY&t=6s...




✓ Downloaded (yt-dlp): temp_8a68703d-36fc-44d3-8d03-47cd4cb8af19.mp4
Extracting audio (FFmpeg)...
✓ Audio extracted + verified: temp_8a68703d-36fc-44d3-8d03-47cd4cb8af19.mp3
Transcribing temp_8a68703d-36fc-44d3-8d03-47cd4cb8af19.mp3...
INFO:     38.210.161.212:0 - "POST /transcribe HTTP/1.1" 200 OK
Detected YouTube URL. Downloading via yt-dlp: https://www.youtube.com/watch?v=lXUZvyajciY&t=6s...




✓ Downloaded (yt-dlp): temp_0cc0b433-7685-4f43-a96f-728d9f12d6af.mp4
Extracting audio (FFmpeg)...
✓ Audio extracted + verified: temp_0cc0b433-7685-4f43-a96f-728d9f12d6af.mp3
Transcribing temp_0cc0b433-7685-4f43-a96f-728d9f12d6af.mp3...
INFO:     38.210.161.212:0 - "POST /transcribe HTTP/1.1" 200 OK
Detected TikTok URL. Downloading via TikWM: https://www.tiktok.com/@therealkingofseans/video/7536354460180827422?is_from_webapp=1&sender_device=pc...
✓ Downloaded (TikWM): temp_880cade7-e688-4503-8b4a-a5f17c182d45.mp4
Extracting audio (FFmpeg)...
✓ Audio extracted + verified: temp_880cade7-e688-4503-8b4a-a5f17c182d45.mp3
Transcribing temp_880cade7-e688-4503-8b4a-a5f17c182d45.mp3...
INFO:     38.210.161.212:0 - "POST /transcribe HTTP/1.1" 200 OK
Detected YouTube URL. Downloading via yt-dlp: https://www.youtube.com/watch?v=lXUZvyajciY&t=6s...




✓ Downloaded (yt-dlp): temp_569db8df-9d84-475b-b76c-7dbdaa1478a1.mp4
Extracting audio (FFmpeg)...
✓ Audio extracted + verified: temp_569db8df-9d84-475b-b76c-7dbdaa1478a1.mp3
Transcribing temp_569db8df-9d84-475b-b76c-7dbdaa1478a1.mp3...
