In [42]:
import os
import shutil
import numpy as np
import soundfile as sf
from sklearn.model_selection import train_test_split
import scipy.io.wavfile as wav

import librosa
from pydub import AudioSegment, effects  
from pydub.silence import split_on_silence

In [43]:
def change_speed(audio, speed_factor=1.0):
    return librosa.effects.time_stretch(audio, rate=speed_factor)

In [44]:
def add_echo(audio, sr, delay=0.1, decay=0.5):
    delay_samples = int(sr * delay)
    echo_signal = np.zeros(len(audio) + delay_samples)
    echo_signal[:len(audio)] += audio
    echo_signal[delay_samples:] += decay * audio
    return echo_signal[:len(audio)]

In [45]:
def add_noise(audio, noise_factor=0.005):
    noise = np.random.randn(len(audio))
    augmented_audio = audio + noise_factor * noise
    return augmented_audio

In [46]:
def pitch_shift(audio, sr, n_steps=2):
    shifted_audio = librosa.effects.pitch_shift(audio, sr=sr, n_steps=n_steps)
    return shifted_audio

In [47]:
def split_audio(audio, sr, fragment_length=5):
    fragment_samples = int(sr * fragment_length)
    return [audio[i:i + fragment_samples] for i in range(0, len(audio), fragment_samples) if len(audio[i:i + fragment_samples]) == fragment_samples]

In [48]:
cur_dir = os.getcwd()
pd_folder = os.path.join(cur_dir, "../Data/ReadText/PD")
hc_folder = os.path.join(cur_dir, "../Data/ReadText/HC")
print(pd_folder)

d:\Python\Pipeline\PythonScripts\../Data/ReadText/PD


In [49]:

def remove_silence(input_file, output_file, silence_thresh=-30, min_silence_len=700, keep_silence=500):
    # Load the audio file
    audio = AudioSegment.from_wav(input_file)

    audio = audio.set_sample_width(2)  # 2 bytes = 16 bits
    
    ''' 
    We can change the attributes of file by  
    changeed_audio_segment = audio_segment.set_ATTRIBUTENAME(x)  
    '''
    # wav_file_new = wav_file.set_frame_rate(50)  
    # print(wav_file_new.frame_rate) 

    audio = effects.normalize(audio) 

    # Split the audio where silence is longer than min_silence_len and quieter than silence_thresh
    chunks = split_on_silence(audio, 
                              min_silence_len=min_silence_len,
                              silence_thresh=silence_thresh,
                              keep_silence=keep_silence)

    # Combine the chunks back together
    non_silent_audio = AudioSegment.empty()
    for chunk in chunks:
        non_silent_audio += chunk

    # Export the result
    non_silent_audio.set_sample_width(2)
    non_silent_audio.set_frame_rate(44100)

     #  To find frame rate of song/file 
    # print(non_silent_audio.frame_rate)    
    # # OUTPUT: 22050  
    
    # # To know about channels of file 
    # print(non_silent_audio.channels)  
    # # OUTPUT: 1 
    
    # # Find the number of bytes per sample  
    # print(non_silent_audio.sample_width )  
    # # OUTPUT : 2 
    
    # # To know length of audio file 
    # print(len(non_silent_audio)) 
    # OUTPUT 60000  
    non_silent_audio.export(output_file, format="wav")

# Example usage
# input_file = "../Data/out16.wav"
# output_file = "../Data/output_no_silence.wav"
# remove_silence(input_file, output_file)

In [50]:
PD_path = os.listdir(pd_folder)
HC_path = os.listdir(hc_folder)
print(PD_path)
print(HC_path)

['ID02_pd_2_0_0 (mp3cut.net).wav', 'ID02_pd_2_0_0_S (mp3cut.net).wav', 'ID04_pd_2_0_1 (mp3cut.net).wav', 'ID04_pd_2_0_1_S (mp3cut.net).wav', 'ID06_pd_3_1_1 (mp3cut.net) (1).wav', 'ID06_pd_3_1_1_S (mp3cut.net).wav', 'ID07_pd_2_0_0 (mp3cut.net) (1).wav', 'ID07_pd_2_0_0_S (mp3cut.net).wav', 'ID13_pd_3_2_2 (mp3cut.net) (1).wav', 'ID13_pd_3_2_2_S (mp3cut.net).wav', 'ID16_pd_2_0_0 (mp3cut.net) (1).wav', 'ID16_pd_2_0_0_S (mp3cut.net).wav', 'ID17_pd_2_1_0 (mp3cut.net) (1).wav', 'ID17_pd_2_1_0_S (mp3cut.net) (1).wav', 'ID17_pd_2_1_0_S (mp3cut.net).wav', 'ID18_pd_4_3_3 (mp3cut.net) (1).wav', 'ID20_pd_3_0_1 (mp3cut.net) (1).wav', 'ID20_pd_3_0_1_S (mp3cut.net) (1).wav', 'ID20_pd_3_0_1_S (mp3cut.net).wav', 'ID24_pd_2_0_0 (mp3cut.net) (1).wav', 'ID24_pd_2_0_0_S (mp3cut.net).wav', 'ID27_pd_4_1_1 (mp3cut.net) (1).wav', 'ID27_pd_4_1_1_S (mp3cut.net) (1).wav', 'ID27_pd_4_1_1_S (mp3cut.net) (2).wav', 'ID27_pd_4_1_1_S (mp3cut.net).wav', 'ID29_pd_3_1_2 (mp3cut.net) (1).wav', 'ID29_pd_3_1_2_S (mp3cut.net).w

In [51]:
# Разделение набора данных на тренировочные и тестовые наборы (train/test split)
PD_train, PD_test = train_test_split(PD_path, test_size=0.2, random_state=42)
HC_train, HC_test = train_test_split(HC_path, test_size=0.2, random_state=42)

In [52]:
train_folder = "../Data/Train"
test_folder = "../Data/Test"
hc_train_folder = os.path.join(train_folder, "HC")
pd_train_folder = os.path.join(train_folder, "PD")
hc_test_folder = os.path.join(test_folder, "HC")
pd_test_folder = os.path.join(test_folder, "PD")

os.makedirs(hc_train_folder, exist_ok=True)
os.makedirs(pd_train_folder, exist_ok=True)
os.makedirs(hc_test_folder, exist_ok=True)
os.makedirs(pd_test_folder, exist_ok=True)

In [36]:
for file in PD_train:
    remove_silence(
    os.path.join(pd_folder, file), 
    os.path.join(pd_train_folder, file)
)
    
for file in PD_test:
    remove_silence(
    os.path.join(pd_folder, file),
    os.path.join(pd_test_folder, file)
)
    
for file in HC_train:
    remove_silence(
    os.path.join(hc_folder, file),
    os.path.join(hc_train_folder, file)
)
    
for file in HC_test:
    remove_silence(
    os.path.join(hc_folder, file),
    os.path.join(hc_test_folder, file)
)

In [53]:
augment_train_folder = "../Data/Augment_audio/Train"
augment_test_folder = "../Data/Augment_audio/Test"
hc_aug_train_folder = os.path.join(augment_train_folder, "HC")
pd_aug_train_folder = os.path.join(augment_train_folder, "PD")
hc_aug_test_folder = os.path.join(augment_test_folder, "HC")
pd_aug_test_folder = os.path.join(augment_test_folder, "PD")

os.makedirs(hc_aug_train_folder, exist_ok=True)
os.makedirs(pd_aug_train_folder, exist_ok=True)
os.makedirs(hc_aug_test_folder, exist_ok=True)
os.makedirs(pd_aug_test_folder, exist_ok=True)

In [54]:
noise_factors = [0.001, 0.005, 0.01]
pitch_shifts = [-2, -1, 1, 2]
speed_factors = [0.9, 1.1]

In [55]:
for folder in [pd_train_folder, pd_test_folder, hc_train_folder, hc_test_folder]:
# for folder in [pd_test_folder, hc_test_folder]:
    for filename in os.listdir(folder):
        if filename.endswith(".wav"):
            
            only_crop = False
            audio_path = os.path.join(folder, filename)
            audio, sr = librosa.load(audio_path, sr=None)
            # sr, audio = wav.read(audio_path)

           
            if folder == pd_train_folder:
                save_folder = pd_aug_train_folder
            if folder == pd_test_folder:
                save_folder = pd_aug_test_folder
                only_crop = True
            if folder == hc_train_folder:
                save_folder = hc_aug_train_folder           
            if folder == hc_test_folder:
                save_folder = hc_aug_test_folder
                only_crop = True

            fragments = split_audio(audio, sr)

            for i, fragment in enumerate(fragments):
                fragment_filename = f"{filename}_fragment_{i}.wav"
                fragment_path = os.path.join(save_folder, fragment_filename)
                sf.write(fragment_path, fragment, sr, subtype='PCM_16')

                if (not only_crop):
                    for noise_factor in noise_factors:
                        augmented_audio_with_noise = add_noise(fragment, noise_factor)
                        noise_filename = f"noise_{noise_factor}_{filename}_fragment_{i}.wav"
                        sf.write(os.path.join(save_folder, noise_filename), augmented_audio_with_noise, sr, subtype='PCM_16')

                    for n_steps in pitch_shifts:
                        augmented_audio_with_pitch_shift = pitch_shift(fragment, sr, n_steps)
                        pitch_shift_filename = f"pitch_shift_{n_steps}_{filename}_fragment_{i}.wav"
                        sf.write(os.path.join(save_folder, pitch_shift_filename), augmented_audio_with_pitch_shift, sr, subtype='PCM_16')

                    for speed_factor in speed_factors:
                        augmented_audio_with_speed_change = change_speed(fragment, speed_factor)
                        speed_change_filename = f"speed_{speed_factor}_{filename}_fragment_{i}.wav"
                        sf.write(os.path.join(save_folder, speed_change_filename), augmented_audio_with_speed_change, sr, subtype='PCM_16')

                    augmented_audio_with_echo = add_echo(fragment, sr)
                    echo_filename = f"echo_{filename}_fragment_{i}.wav"
                    sf.write(os.path.join(save_folder, echo_filename), augmented_audio_with_echo, sr, subtype='PCM_16')

In [56]:
num_augmented_audio = len(os.listdir(hc_aug_train_folder)) + len(os.listdir(hc_aug_test_folder)) \
    + len(os.listdir(pd_aug_train_folder)) + len(os.listdir(pd_aug_test_folder))

In [57]:
print("Количество аугментированных аудио:", num_augmented_audio)

Количество аугментированных аудио: 10619
