In [10]:
import pandas as pd
import torch
import os
from tqdm import tqdm

# Импортируем экстракторы
from video_extractor.VideoFeatureExtractor import VideoFeatureExtractor
from speech_to_text.SpeechToTextFeatureExtractor import SpeechToTextFeatureExtractor
from text_extractor.TextFeatureExtractor import TextFeatureExtractor

# Путь к CSV и директории с видео
csv_file_path = '/home/glooma/Code/Python/ML/Hakatons/train_dataset_tag_video/baseline/train_data_categories.csv'
videos_directory = '/home/glooma/Code/Python/ML/Hakatons/train_dataset_tag_video/videos/'

def extract_and_save_embeddings(video_id):
    # Читаем CSV файл
    df = pd.read_csv(csv_file_path)
    
    # Получаем строку с данными по video_id
    video_data = df[df['video_id'] == video_id]
    if video_data.empty:
        raise ValueError(f"Video ID {video_id} not found in CSV.")
    
    # Извлекаем нужные данные
    video_file_name = video_data['video_id'].values[0] + '.mp4'    
    title = video_data['title'].values[0]
    description = video_data['description'].values[0]

    # Полный путь к видео
    video_file_path = os.path.join(videos_directory, video_file_name)

    # Проверяем существование видеофайла
    if not os.path.exists(video_file_path):
        raise FileNotFoundError(f"Video file {video_file_path} not found.")

    vosk_model_path = "/home/glooma/Code/Python/ML/Hakatons/hackathon_video_tagging/model/extractors/speech_to_text/vosk-model-small-ru-0.22"


    # Инициализация экстракторов
    video_extractor = VideoFeatureExtractor()
    audio_extractor = SpeechToTextFeatureExtractor(vosk_model_path=vosk_model_path)
    text_extractor = TextFeatureExtractor()

    # Создаем tqdm объект для отображения прогресса
    pbar = tqdm(total=4, desc="Extracting features")

    # Извлечение эмбеддингов
    pbar.set_description("Extracting video features")
    video_embeddings = video_extractor.extract_features(video_file_path)
    pbar.update(1)

    pbar.set_description("Extracting text features")
    text_embeddings = text_extractor.extract_features(f"{title} {description}")
    pbar.update(1)

    pbar.set_description("Extracting audio features")
    audio_embeddings = audio_extractor.extract_features(video_file_path)
    pbar.update(1)

    # Объединение эмбеддингов
    pbar.set_description("Combining embeddings")
    combined_embeddings = torch.cat((video_embeddings, text_embeddings, audio_embeddings), dim=1)
    pbar.update(1)

    # Сохранение в файл
    output_file = f"{video_file_name}.pt"
    torch.save(combined_embeddings, output_file)
    pbar.close()
    print(f"Embeddings saved to {output_file}")

# Пример использования:
input_video_id = '1e0a5151efc26a3a8e038e132f6b80f4'  # Замените на нужный video_id
try:
    extract_and_save_embeddings(input_video_id)
except Exception as e:
    print(f"An error occurred: {str(e)}")

LOG (VoskAPI:ReadDataFiles():model.cc:213) Decoding params beam=10 max-active=3000 lattice-beam=2
LOG (VoskAPI:ReadDataFiles():model.cc:216) Silence phones 1:2:3:4:5:6:7:8:9:10
LOG (VoskAPI:RemoveOrphanNodes():nnet-nnet.cc:948) Removed 0 orphan nodes.
LOG (VoskAPI:RemoveOrphanComponents():nnet-nnet.cc:847) Removing 0 orphan components.
LOG (VoskAPI:ReadDataFiles():model.cc:248) Loading i-vector extractor from /home/glooma/Code/Python/ML/Hakatons/hackathon_video_tagging/model/extractors/speech_to_text/vosk-model-small-ru-0.22/ivector/final.ie
LOG (VoskAPI:ComputeDerivedVars():ivector-extractor.cc:183) Computing derived variables for iVector extractor
LOG (VoskAPI:ComputeDerivedVars():ivector-extractor.cc:204) Done.
LOG (VoskAPI:ReadDataFiles():model.cc:282) Loading HCL and G from /home/glooma/Code/Python/ML/Hakatons/hackathon_video_tagging/model/extractors/speech_to_text/vosk-model-small-ru-0.22/graph/HCLr.fst /home/glooma/Code/Python/ML/Hakatons/hackathon_video_tagging/model/extractors

MoviePy - Writing audio in temp_audio.wav


Extracting audio features:  50%|█████     | 2/4 [00:02<00:01,  1.09it/s]

MoviePy - Done.


Combining embeddings: 100%|██████████| 4/4 [00:09<00:00,  2.31s/it]     


Embeddings saved to 1e0a5151efc26a3a8e038e132f6b80f4.mp4.pt
