In [25]:
import subprocess

In [26]:
import subprocess

def cut_video(input_path, output_path, start_time, end_time):
    cmd = [
        'ffmpeg',
        '-ss', str(start_time),
        '-to', str(end_time),
        '-i', input_path,
        '-c:v', 'libx264',
        '-c:a', 'aac',
        '-preset', 'fast',    
        output_path
    ]

    subprocess.run(cmd)

def scale_video(path, output, width=900, height=1600):
    subprocess.run(["ffmpeg", "-i", path, "-vf",
                    f"scale={width}:{height}", output])

def add_border_ffmpeg(input_path, output_path, border_hd=0, border_lr=0, border_color="black"):
    """
    Add border using FFmpeg (faster for large videos)
    :param border_color: Can be color name or hex code (#RRGGBB)
    :param border_hd: size of border under and upper of original video (total height increased on border_hd*2)
    :param border_lr: size of left and right border (total width increased on border_lr*2l)
    """
    cmd = [
        'ffmpeg',
        '-i', input_path,
        '-vf', f'pad=width=iw+{border_lr*2}:height=ih+{border_hd*2}:x={border_lr}:y={border_hd}:color={border_color}',
        '-c:a', 'copy',
        output_path
    ]

    subprocess.run(cmd)

def blur_video(input_path, output_path):
    cmd = [
        'ffmpeg',
        '-i', input_path,
        '-vf', 'boxblur=10:5',
        '-c:a', 'copy',
        output_path
    ]
    subprocess.run(cmd)

def compose_center(background_path, overlay_path, output_path):
    cmd = [
        'ffmpeg',
        '-i', background_path,
        '-i', overlay_path,
        '-filter_complex',
        '[0:v][1:v]overlay=(W-w)/2:(H-h)/2',
        '-c:a', 'copy',
        output_path
    ]
    subprocess.run(cmd)


In [27]:
# blur_video("input.mp4", "output.mp4")
# scale_video("blur.mp4", "background.mp4", width=1920, height=1080*3)
# compose_center("background.mp4", "input.mp4", "output.mp4")
# cut_with_reencode("input.mp4", "cutted.mp4", 5, 10)

In [28]:
# clip = VideoFileClip("output.mp4")
# clip.size

In [30]:
# model = Model("../models/vosk-model-ru-0.22")

In [32]:
import os
import whisper
import torch
from pydub import AudioSegment
from pydub.silence import split_on_silence
from tqdm import tqdm
import ffmpeg

# Конфигурация
CONFIG = {
    "model_size": "small",          # Выбор модели Whisper
    "min_chunk_duration": 30,        # Минимальная длина фрагмента (сек)
    "max_chunk_duration": 60,        # Максимальная длина фрагмента (сек)
    "silence_thresh": -40,           # Порог тишины в dB
    "min_silence_len": 800,          # Минимальная пауза (мс)
    "keep_silence": 300,             # Оставлять тишины по краям (мс)
    "output_folder": "video_chunks", # Папка для результатов
    "language": "ru"                 # Язык распознавания
}

class VideoProcessor:
    def __init__(self, config):
        self.config = config
        self.model = self._load_model()
        os.makedirs(config["output_folder"], exist_ok=True)

    def _load_model(self):
        device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Загружаем модель Whisper ({self.config['model_size']}) на {device}...")
        return whisper.load_model(self.config["model_size"], device=device)

    def _extract_audio(self, video_path):
        audio_path = os.path.join(self.config["output_folder"], "temp_audio.wav")
        subprocess.run([
            "ffmpeg",
            "-i", video_path,
            "-ac", "1",
            "-ar", "16000",
            "-y",
            audio_path
        ], check=True)
        return audio_path

    def _process_audio(self, audio_path):
        """Обрабатывает аудио и возвращает фрагменты"""
        audio = AudioSegment.from_file(audio_path)
        audio = audio.normalize()

        print("Разделение аудио на фрагменты...")
        chunks = split_on_silence(
            audio,
            min_silence_len=self.config["min_silence_len"],
            silence_thresh=self.config["silence_thresh"],
            keep_silence=self.config["keep_silence"]
        )

        # Объединяем короткие фрагменты
        combined = []
        current = chunks[0]
        
        for chunk in tqdm(chunks[1:], desc="Объединение фрагментов"):
            duration = (len(current) + len(chunk)) / 1000
            if duration < self.config["max_chunk_duration"]:
                current += chunk
            else:
                if len(current) / 1000 >= self.config["min_chunk_duration"]:
                    combined.append(current)
                current = chunk

        if len(current) / 1000 >= self.config["min_chunk_duration"]:
            combined.append(current)

        return combined

    def _split_video(self, video_path, chunks):
        for i, chunk in enumerate(chunks):
            start = len(sum(chunks[:i], AudioSegment.empty())) / 1000
            end = start + len(chunk) / 1000
            output_path = os.path.join(self.config["output_folder"], f"chunk_{i:03d}.mp4")
            
            subprocess.run([
                "ffmpeg",
                "-i", video_path,
                "-ss", str(start),
                "-to", str(end),
                "-c", "copy",
                "-y",
                output_path
            ], check=True)

    def process(self, video_path):
        """Основной метод обработки"""
        try:
            # 1. Извлекаем аудио
            audio_path = self._extract_audio(video_path)
            
            # 2. Получаем фрагменты
            chunks = self._process_audio(audio_path)
            
            # 3. Нарезаем видео
            self._split_video(video_path, chunks)
            
            # 4. Транскрибация (опционально)
            if self.model:
                for i, chunk in enumerate(chunks):
                    chunk.export(f"temp.wav", format="wav")
                    result = self.model.transcribe(
                        "temp.wav",
                        language=self.config["language"],
                        word_timestamps=True
                    )
                    with open(os.path.join(self.config["output_folder"], f"chunk_{i:03d}.txt"), "w") as f:
                        f.write(result["text"])
                    os.remove("temp.wav")
            
            print(f"\nГотово! Сохранено {len(chunks)} фрагментов в {self.config['output_folder']}")
            
        finally:
            # Очистка временных файлов
            if os.path.exists(audio_path):
                os.remove(audio_path)

if __name__ == "__main__":
    processor = VideoProcessor(CONFIG)
    processor.process("../data/input1.mp4")  # Укажите ваш файл

Загружаем модель Whisper (small) на cuda...


ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab

Разделение аудио на фрагменты...


Объединение фрагментов: 100%|██████████| 87/87 [00:00<00:00, 8958.89it/s]
ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enabl


Готово! Сохранено 10 фрагментов в video_chunks


/tmp/tmpn_g_0msa/main.c:5:10: fatal error: Python.h: No such file or directory
    5 | #include <Python.h>
      |          ^~~~~~~~~~
compilation terminated.
/tmp/tmp12ueg9i9/main.c:5:10: fatal error: Python.h: No such file or directory
    5 | #include <Python.h>
      |          ^~~~~~~~~~
compilation terminated.
