In [None]:
import pandas as pd
import subprocess
import os
from pydub import AudioSegment


In [20]:
def log(message):
    print(f"[LOG] {message}")

In [21]:
class Segment:
    def __init__(self, name, start, end):
        self.name = name
        self.start = start
        self.end = end

In [22]:
class Data:
    def __init__(self, uri, speaker, verification, identification, segments=None):
        self.segments = segments if segments is not None else []
        self.uri = uri
        self.speaker = speaker
        self.verification = verification
        self.identification = identification

    def AddSegment(self, segment):
        self.segments.append(segment)
    
    def Print(self):
        print("SPEAKER: ", self.speaker)
        print("URI: ", self.uri)
        print("SEGMENTS: ")
        for segment in self.segments:
            print('\t name: ', segment.name)
            print('\t start: ', segment.start)
            print('\t end: ', segment.end)

In [23]:
def DecodeFromCSV(csv_file):
    data = pd.read_csv(csv_file)

    data_objects = {}

    for index, row in data.iterrows():
        segment = Segment(row['segment'], row['start'], row['end'])
        uri = row['uri']

        if uri not in data_objects:
            data_objects[uri] = Data(uri, row['speaker'], row['verification'], row['identification'])
        
        data_objects[uri].AddSegment(segment)

    return list(data_objects.values())

In [24]:
data_objects = DecodeFromCSV('./datasets/voxceleb/data/v1/voxceleb1.csv')


In [None]:
def download_youtube_video(youtube_id, save_directory='./data_video/temp'):
    output_path = os.path.join(save_directory, f'{youtube_id}.mp4')
    if not os.path.exists(output_path):
        log(f"Скачивание видео {youtube_id}...")
        command = [
            'yt-dlp',
            '-f', 'bestvideo+bestaudio',
            '--merge-output-format', 'mp4',
            f'https://www.youtube.com/watch?v={youtube_id}',
            '--output', output_path
        ]
        process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)

        for output in process.stdout:
            print(output, end='')
            
        log(f"Видео {youtube_id} скачано!")

In [26]:
def convert_video_gpu(input_file, output_file):
    log(f"Конвертация видео {input_file} в {output_file} с использованием GPU...")
    command = [
        'ffmpeg', '-threads', '1', '-y', '-hwaccel', 'cuda', '-i', input_file, 
        '-async', '1', '-qscale:v', '5', '-r', '25', 
        '-vf', 'yadif,scale=640:360', '-c:v', 'h264_nvenc', '-c:a', 'aac',
        output_file
    ]
    result = subprocess.run(command, text=True, capture_output=True)

    if result.returncode != 0:
        log(f"Ошибка при конвертации {input_file}: {result.stderr}")
        with open("error_log.txt", "a") as log_file:
            log_file.write(f"Ошибка с {input_file}: {result.stderr}\n")
    else:
        log(f"Видео {input_file} сконвертировано в {output_file}!")

In [27]:
def convert_video_cpu(input_file, output_file):
    log(f"Конвертация видео {input_file} в {output_file}...")
    command = [
        'ffmpeg', '-threads', '1', '-y', '-i', input_file, 
        '-async', '1', '-qscale:v', '5', '-r', '25', 
        '-vf', 'yadif,scale=trunc(iw/2)*2:-1', '-c:v', 'libx264', '-c:a', 'aac',
        output_file
    ]
    result = subprocess.run(command, text=True, capture_output=True)

    if result.returncode != 0:
        log(f"Ошибка при конвертации {input_file}: {result.stderr}")
        with open("error_log.txt", "a") as log_file:
            log_file.write(f"Ошибка с {input_file}: {result.stderr}\n")
    else:
        log(f"Видео {input_file} сконвертировано в {output_file}!")

In [28]:
def slice_video(input_file, start_time, end_time, output_file):
    log(f"Нарезка видео {input_file} с {start_time} до {end_time} в {output_file}...")
    
    os.makedirs(os.path.dirname(output_file), exist_ok=True)
    
    command = [
        'ffmpeg', '-y', '-i', input_file, '-ss', str(start_time), 
        '-to', str(end_time), '-c:v', 'libx264', '-c:a', 'aac', output_file
    ]
    result = subprocess.run(command, text=True, capture_output=True)
    
    if result.returncode != 0:
        log(f"Ошибка при нарезке {input_file}: {result.stderr}")
        with open("error_log.txt", "a") as log_file:
            log_file.write(f"Ошибка с {input_file}: {result.stderr}\n")
    else:
        log(f"Фрагмент сохранен в {output_file}!")

In [None]:
MAX_VIDEO_BY_PERSON = 100
MAX_PERSON = 5

In [30]:
counter_video_map = {}
counter_video = 0
counter_person = 0

In [31]:
missing_videos_log = './missing_videos.txt'
for data in data_objects:

    if counter_person >= MAX_PERSON + 1:
        break

    if data.speaker not in counter_video_map:
        counter_video_map[data.speaker] = 0
        counter_person += 1
    counter_video = counter_video_map[data.speaker]
    if counter_video >= MAX_VIDEO_BY_PERSON:
        continue

    print('PERSON: ',  data.speaker, 'CURRENT VIDEO: ', counter_video)

    youtube_id = data.uri.split('/')[-1]
    input_file = f'./data_video/temp/{youtube_id}.mp4'
    converted_file = f'./data_video/temp/converted_{youtube_id}.mp4'

    if not os.path.exists(input_file):
        download_youtube_video(youtube_id)

    if os.path.exists(input_file):

        if not os.path.exists(converted_file):
            convert_video_gpu(input_file, converted_file)

        if os.path.exists(converted_file):
            for segment in data.segments:
                if counter_video < MAX_VIDEO_BY_PERSON:
                    log("Segmentation is started")
                    start_time = segment.start
                    end_time = segment.end
                    output_file = f'./data_video/{segment.name}.mp4'
                    slice_video(converted_file, start_time, end_time, output_file)
                    log("Segmentation is ended")
                    counter_video += 1
                else:
                    counter_video_map[data.speaker] = counter_video
                    break
    else:
        log(f"Файл {input_file} не найден, пропуск...")
        with open(missing_videos_log, 'a') as log_file:
            log_file.write(f"{youtube_id}\n")

    counter_video_map[data.speaker] = counter_video
    

PERSON:  Aaron_Yoo CURRENT VIDEO:  0
[LOG] Segmentation is started
[LOG] Нарезка видео ./data_video/temp/converted_1geDB-I2TjE.mp4 с 2.8 до 16.1 в ./data_video/Aaron_Yoo/1geDB-I2TjE_0000001.mp4...
[LOG] Фрагмент сохранен в ./data_video/Aaron_Yoo/1geDB-I2TjE_0000001.mp4!
[LOG] Segmentation is ended
PERSON:  Aaron_Yoo CURRENT VIDEO:  1
[LOG] Segmentation is started
[LOG] Нарезка видео ./data_video/temp/converted_38_N-A-QOUo.mp4 с 34.7 до 40.0 в ./data_video/Aaron_Yoo/38_N-A-QOUo_0000001.mp4...
[LOG] Фрагмент сохранен в ./data_video/Aaron_Yoo/38_N-A-QOUo_0000001.mp4!
[LOG] Segmentation is ended
[LOG] Segmentation is started
[LOG] Нарезка видео ./data_video/temp/converted_38_N-A-QOUo.mp4 с 90.1 до 94.6 в ./data_video/Aaron_Yoo/38_N-A-QOUo_0000002.mp4...
[LOG] Фрагмент сохранен в ./data_video/Aaron_Yoo/38_N-A-QOUo_0000002.mp4!
[LOG] Segmentation is ended
[LOG] Segmentation is started
[LOG] Нарезка видео ./data_video/temp/converted_38_N-A-QOUo.mp4 с 101.4 до 108.4 в ./data_video/Aaron_Yoo/38

In [None]:

def extract_and_split_audio(source_dir, train_dir, test_dir):
    for target_dir in [train_dir, test_dir]:
        if not os.path.exists(target_dir):
            os.makedirs(target_dir)

    for person_name in os.listdir(source_dir):
        person_path = os.path.join(source_dir, person_name)
        
        if os.path.isdir(person_path):
            files = [f for f in os.listdir(person_path) if f.endswith('.mp4')]
            train_index = int(len(files) * 0.8)

            train_files = files[:train_index]
            test_files = files[train_index:]

            process_files(person_name, person_path, train_files, train_dir)
            process_files(person_name, person_path, test_files, test_dir)

def process_files(person_name, person_path, files, target_base_dir):
    target_person_path = os.path.join(target_base_dir, person_name)
    if not os.path.exists(target_person_path):
        os.makedirs(target_person_path)

    for file_name in files:
        video_path = os.path.join(person_path, file_name)
        audio_file_name = os.path.splitext(file_name)[0] + '.wav'
        audio_path = os.path.join(target_person_path, audio_file_name)


        video = AudioSegment.from_file(video_path, format='mp4')
        video.export(audio_path, format='wav')

source_directory = 'data_video'
train_directory = 'data_audio_train'
test_directory = 'data_audio_test'

extract_and_split_audio(source_directory, train_directory, test_directory)

IndexError: list index out of range