In [1]:
import os
import librosa
import random

import noisereduce as nr
import soundfile as sf
import numpy as np

from pathlib import Path


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def get_speakers(meta_data):
    male_speakers = []
    female_speakers = []
    random.seed(42)

    with open(meta_data, 'r') as file:
        # Iterate through each line in the file
        for line in file:
            # Skip lines starting with a semicolon (comments)
            if line.strip().startswith(";") or not line.strip():
                continue
            
            # Split the line by pipe (|) and strip extra spaces
            fields = [field.strip() for field in line.split('|')]
            
            # Ensure the line has all required fields
            if len(fields) >= 5:
                reader_id = fields[0]
                gender = fields[1]
                subset = fields[2]
                
                # Add to appropriate list based on conditions
                if subset == "train-clean-100":
                    if gender == "M":
                        male_speakers.append(reader_id)
                    elif gender == "F":
                        female_speakers.append(reader_id)

    male_speakers_sample = random.sample(male_speakers, min(50, len(male_speakers)))
    female_speakers_sample = random.sample(female_speakers, min(50, len(female_speakers)))

    samples = male_speakers_sample
    samples.extend(female_speakers_sample)

    return samples

In [33]:
def clean_and_combine(speakers, data_root="../../../data/LibriSpeech/train-clean-100/"):
    for speaker in speakers:
        denoised_files = []
        if not os.path.exists(f"../../../data/cleaned_combined_v2/{speaker}.wav"):
            for root, _, files in os.walk(f"{data_root}{speaker}"):
                for file in files:
                    if file.endswith('.flac'):
                        input_file_path = Path(root) / file
                        audio_signal, sample_rate = librosa.load(input_file_path, sr=None)
                        denoised_audio_signal = nr.reduce_noise(y=audio_signal, sr=sample_rate, prop_decrease=0.8)
                        denoised_files.append(denoised_audio_signal)

            combined_audio = np.concatenate(denoised_files)
            sf.write(f"../../../data/cleaned_combined_v2/{speaker}.wav", combined_audio, sample_rate)
            print(f"speaker {speaker} completed")
        else:
            print(f"file for {speaker} already exists skipping")


In [31]:
file_path = "../../../data/LibriSpeech/SPEAKERS.TXT"
samples = get_speakers(file_path)

print(samples)

['5456', '374', '78', '6529', '1624', '1334', '1040', '445', '8747', '332', '5808', '8425', '4406', '307', '4898', '3242', '118', '8770', '8095', '1034', '1081', '3857', '5104', '7447', '4788', '909', '6367', '5678', '6181', '8098', '3240', '8609', '3486', '8088', '8630', '26', '460', '8063', '2384', '6925', '458', '7367', '6019', '8419', '7402', '2893', '311', '2514', '2436', '1455', '7059', '87', '6000', '2910', '3983', '250', '8123', '2007', '198', '4088', '1447', '7148', '4680', '4640', '7517', '1970', '4195', '587', '5778', '125', '8468', '5339', '1069', '6385', '7800', '8014', '6818', '211', '8051', '1263', '8324', '4853', '7511', '328', '1992', '1963', '696', '5393', '1246', '5022', '150', '4362', '8465', '403', '8312', '1098', '5789', '2989', '6147', '5514']


In [34]:
clean_and_combine(samples)

file for 5456 already exists skipping
file for 374 already exists skipping
file for 78 already exists skipping
file for 6529 already exists skipping
file for 1624 already exists skipping
speaker 1334 completed
speaker 1040 completed
speaker 445 completed
speaker 8747 completed
speaker 332 completed
speaker 5808 completed
speaker 8425 completed
speaker 4406 completed
speaker 307 completed
speaker 4898 completed
speaker 3242 completed
speaker 118 completed
speaker 8770 completed
speaker 8095 completed
speaker 1034 completed
speaker 1081 completed
speaker 3857 completed
speaker 5104 completed
speaker 7447 completed
speaker 4788 completed
speaker 909 completed
speaker 6367 completed
speaker 5678 completed
speaker 6181 completed
speaker 8098 completed
speaker 3240 completed
speaker 8609 completed
speaker 3486 completed
speaker 8088 completed
speaker 8630 completed
speaker 26 completed
speaker 460 completed
speaker 8063 completed
speaker 2384 completed
speaker 6925 completed
speaker 458 comp