<a href="https://colab.research.google.com/github/MLegkovskis/whisper-russian-mp4-transcribe/blob/main/russian_mp4_whisper_audio_transcribe.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# 🔧 system + python deps (Whisper uses GPU automatically if available)
!apt -y update -qq
!apt -y install -qq ffmpeg

!pip -q install --upgrade pip
!pip -q install openai-whisper regex tqdm

import os, re
from pathlib import Path
from tqdm import tqdm
import whisper
import torch

print("CUDA available:", torch.cuda.is_available())

35 packages can be upgraded. Run 'apt list --upgradable' to see them.
[1;33mW: [0mSkipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)[0m
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for openai-whisper (pyproject.toml) ... [?25l[?25hdone
CUDA available: True


In [5]:
# ⚙️ params
VIDEOS_DIR = Path("stepik_videos")  # folder with your .mp4 files
TEXTS_DIR  = Path("texts")         # output .txt folder (one file per video)
MODEL_NAME = "large-v3"            # tiny/base/small/medium/large-v3
LANGUAGE   = "ru"                  # Russian transcription only

TEXTS_DIR.mkdir(parents=True, exist_ok=True)

In [6]:
# minimal command detection (since commands are spoken)
COMMAND_VERBS = r"(kubectl|helm|minikube|kubeadm|docker|podman|ctr|crictl|systemctl|journalctl|apt|yum|dnf|pacman|brew|snap|pip|python3?|node|npm|npx|curl|wget|git|ssh|scp|ls|cd|cat|nano|vim|vi|less|tail|head|echo|tee|sed|awk|grep|tar|zip|unzip|make|cmake|kubens|kubectx|kind|kustomize|terraform|istioctl|k9s)"
CMD_RE = re.compile(rf"(?:^|\s)(?:sudo\s+)?{COMMAND_VERBS}\b.*", re.IGNORECASE)

def load_model(name: str):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"→ loading whisper {name} on {device}")
    return whisper.load_model(name, device=device)

def transcribe_ru(model, video_path: Path):
    return model.transcribe(str(video_path), task="transcribe", language=LANGUAGE)

def segments_to_timestamped_text(segments):
    lines = []
    for s in segments:
        start = s.get("start", 0.0)
        end = s.get("end", 0.0)
        text = (s.get("text") or "").strip()
        lines.append(f"[{start:7.2f}–{end:7.2f}] {text}")
    return "\n".join(lines)

def detect_commands_from_segments(segments):
    seen, out = set(), []
    for s in segments:
        t = (s.get("text") or "").strip()
        if not t:
            continue
        # find command-like phrase inside the spoken text
        m = CMD_RE.search(t)
        if m:
            cmd = re.sub(r"\s+", " ", m.group(0).strip())
            key = cmd.lower()
            if key not in seen:
                seen.add(key)
                out.append(cmd)
    return out


In [None]:
videos = sorted([p for p in VIDEOS_DIR.glob("*") if p.suffix.lower() in {".mp4", ".mkv", ".mov", ".m4v"}])
assert videos, f"No videos found in {VIDEOS_DIR}. Put your files there."

model = load_model(MODEL_NAME)

for vid in videos:
    print(f"\n====== {vid.name} ======")
    res = transcribe_ru(model, vid)
    segs = res.get("segments", [])

    transcript_ru = segments_to_timestamped_text(segs)
    commands = detect_commands_from_segments(segs)

    out_txt = TEXTS_DIR / f"{vid.stem}.txt"
    with open(out_txt, "w", encoding="utf-8") as f:
        f.write(f"# {vid.name}\n")
        f.write(f"Model: {MODEL_NAME}\n\n")
        f.write("## Transcript (RU)\n")
        f.write(transcript_ru)
        f.write("\n\n## Commands (detected)\n")
        if commands:
            for c in commands:
                f.write(f"- {c}\n")
        else:
            f.write("- (none detected)\n")

    print(f"→ wrote {out_txt}")

print("\n✅ done. One .txt per video is in ./texts/")


→ loading whisper large-v3 on cuda


100%|█████████████████████████████████████| 2.88G/2.88G [01:14<00:00, 41.3MiB/s]



→ wrote texts/Шаг 1 Autocomplete Stepik.txt

→ wrote texts/Шаг 1 Build Stepik.txt

→ wrote texts/Шаг 1 ConfigMap Монтирование фаилов Stepik.txt

→ wrote texts/Шаг 1 CronJob Stepik.txt

→ wrote texts/Шаг 1 DaemonSet Stepik.txt

→ wrote texts/Шаг 1 Deploy Stepik.txt

→ wrote texts/Шаг 1 Deployment Stepik.txt

→ wrote texts/Шаг 1 Helm изнутри Stepik.txt

→ wrote texts/Шаг 1 Ingress Stepik.txt

→ wrote texts/Шаг 1 Job Stepik.txt

→ wrote texts/Шаг 1 Kubernetes Stepik.txt

→ wrote texts/Шаг 1 Multistage Stepik.txt

→ wrote texts/Шаг 1 Namespace Stepik.txt

→ wrote texts/Шаг 1 Pod Stepik.txt

→ wrote texts/Шаг 1 Secret Переменные окружения Stepik.txt

→ wrote texts/Шаг 1 UI для Kubernetes Stepik.txt

→ wrote texts/Шаг 1 Values Системные переменные Stepik.txt

→ wrote texts/Шаг 1 alias Stepik.txt

→ wrote texts/Шаг 1 kns ktx Stepik.txt

→ wrote texts/Шаг 1 kubeconfig Stepik.txt

→ wrote texts/Шаг 1 kubectl debug и nsenter Stepik.txt

→ wrote texts/Шаг 1 kubespray Stepik.txt

→ wrote texts/Ша