In [1]:
from moviepy.editor import VideoFileClip
from pydub import AudioSegment
import torch
from transformers import Wav2Vec2Processor, Wav2Vec2Model
import numpy as np

In [7]:
class AudioExtractor:
    def __init__(self, video_path):
        self.video_clip = VideoFileClip(video_path)
        self.audio_clip = self.video_clip.audio.set_fps(8000)
        if self.video_clip.duration > 420:
            self.audio_clip = self.audio_clip.subclip(0, 420)
        # Аудио из видео
        audio_path = "temp_audio.wav"  # Временный файл
        
        self.audio_clip.write_audiofile(audio_path)

        # Загружаем аудио в новом формате
        self.audio_segment = AudioSegment.from_wav(audio_path)

        # Инициализируем процессор и модель
        self.processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base")
        self.model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base")

    def extract_features(self) -> torch.Tensor:
        # Конвертируем в np.array
        audio_samples = np.array(self.audio_segment.get_array_of_samples()).astype(np.float32)
        
        # Конвертируем в моно
        if self.audio_segment.channels == 2:
            audio_samples = audio_samples.reshape((-1, 2)).mean(axis=1)
        
        # Нормализуем
        audio_samples /= np.max(np.abs(audio_samples))

        # Подготавливаем inputs для модели
        inputs = self.processor(audio_samples, sampling_rate=16000, return_tensors="pt", padding=True)

        # Получаем эмбеддинги
        with torch.no_grad():
            embeddings = self.model(inputs.input_values).last_hidden_state
        

        return embeddings.mean(dim=1)

In [8]:
path = r"C:\Users\Darya\Downloads\videos\3ec7c2b092514dc4ebeaa3036fe9857c.mp4"

audio = AudioExtractor(path) # извлекаем аудио


MoviePy - Writing audio in temp_audio.wav


                                                                      

MoviePy - Done.


In [9]:
res = audio.extract_features() # получаем эмбеддинги

In [10]:
res.shape

torch.Size([1, 768])