In [9]:
# Importing the libraries
import numpy as np
import pandas as pd
import librosa
import matplotlib.pyplot as plt
import os
import soundfile as sf
import random

In [10]:
# # Load DataFrame
df = pd.read_csv('D:\SEM 4\Project\\SEP-28k_labels.csv')

In [11]:
df.head()

Unnamed: 0,Show,EpId,ClipId,Start,Stop,Unsure,PoorAudioQuality,Prolongation,Block,SoundRep,WordRep,DifficultToUnderstand,Interjection,NoStutteredWords,NaturalPause,Music,NoSpeech
0,HeStutters,0,0,31900320,31948320,0,0,0,0,0,0,0,0,3,1,0,0
1,HeStutters,0,1,31977120,32025120,0,0,0,0,0,0,0,0,3,1,0,0
2,HeStutters,0,2,34809760,34857760,0,0,0,0,0,0,0,0,3,0,0,0
3,HeStutters,0,3,35200640,35248640,0,0,1,0,0,0,0,0,2,0,0,0
4,HeStutters,0,4,35721920,35769920,0,0,0,0,0,0,0,0,3,0,0,0


In [12]:
df['Show'].unique()

array(['HeStutters', 'HVSA', 'IStutterSoWhat', 'MyStutteringLife',
       'StrongVoices', 'StutterTalk', 'StutteringIsCool',
       'WomenWhoStutter'], dtype=object)

In [13]:
#Reading the audio files
main_folder = 'D:\\SEM 4\\Project\\sep28k-final'
def list_files(directory):
    for root, _, files in os.walk(directory):
        for file in files:
            yield os.path.join(root, file)

In [17]:
def count_files(directory):
    file_count = 0
    for root, _, files in os.walk(directory):
        file_count += len(files)
    return file_count

In [18]:
num_files = count_files(main_folder)
print("Number of files in main directory:", num_files)

Number of files in main directory: 21836


In [8]:
def remove_short_clips(directory):
    removed_count = 0
    for file_path in list_files(directory):
        # Load audio and get its duration
        audio, sr = librosa.load(file_path, sr=None)
        duration = librosa.get_duration(y=audio, sr=sr)
        # Check if duration is less than 3 seconds
        if duration < 3.0:
            # Remove the file
            os.remove(file_path)
            removed_count += 1
            print(f"Removed {file_path} (duration: {duration:.2f} seconds)")
    return removed_count


In [9]:
removed_count = remove_short_clips(main_folder)
print("Total files removed:", removed_count)

Removed D:\SEM 4\Project\sep28k-final\HeStutters\17\HeStutters_17_0.wav (duration: 2.90 seconds)
Removed D:\SEM 4\Project\sep28k-final\HeStutters\2\HeStutters_2_41.wav (duration: 2.70 seconds)
Removed D:\SEM 4\Project\sep28k-final\HVSA\0\HVSA_0_0.wav (duration: 2.63 seconds)
Removed D:\SEM 4\Project\sep28k-final\HVSA\3\HVSA_3_37.wav (duration: 2.86 seconds)
Removed D:\SEM 4\Project\sep28k-final\IStutterSoWhat\2\IStutterSoWhat_2_37.wav (duration: 2.88 seconds)
Removed D:\SEM 4\Project\sep28k-final\MyStutteringLife\0\MyStutteringLife_0_10.wav (duration: 2.56 seconds)
Removed D:\SEM 4\Project\sep28k-final\MyStutteringLife\18\MyStutteringLife_18_1.wav (duration: 2.69 seconds)
Removed D:\SEM 4\Project\sep28k-final\MyStutteringLife\23\MyStutteringLife_23_3.wav (duration: 2.69 seconds)
Removed D:\SEM 4\Project\sep28k-final\MyStutteringLife\7\MyStutteringLife_7_13.wav (duration: 2.73 seconds)
Removed D:\SEM 4\Project\sep28k-final\StutterTalk\5\StutterTalk_5_8.wav (duration: 2.83 seconds)
Remov

In [10]:
num_files_after = count_files(main_folder)
print("Number of files after removal:", num_files_after)

Number of files after removal: 21836


In [11]:
def check_sampling_rate(directory, target_sr=16000):
    non_matching_files = []
    for file_path in list_files(directory):
        # Load audio and get its sampling rate
        audio, sr = librosa.load(file_path, sr=None)
        # Check if sampling rate is not the target rate
        if sr != target_sr:
            non_matching_files.append((file_path, sr))
    return non_matching_files

In [12]:
# Check sampling rate of audio files
non_matching_files = check_sampling_rate(main_folder)

In [13]:
if non_matching_files:
    print("Files with non-matching sampling rates:")
    for file_path, sr in non_matching_files:
        print(f"{file_path}: Sampling rate = {sr}")
else:
    print("All files have the desired sampling rate (16000)")

All files have the desired sampling rate (16000)


In [14]:
num_files_after = count_files(main_folder)
print("Number of files after checking:", num_files_after)

Number of files after checking: 21836


In [15]:
def augment_all_files_music(input_folder, output_folder, music_folder):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    # Loop through all files in the input folder and its subdirectories
    for root, _, files in os.walk(input_folder):
        # Create corresponding output subdirectory structure
        output_subfolder = os.path.join(output_folder, os.path.relpath(root, input_folder))
        os.makedirs(output_subfolder, exist_ok=True)
        
        for file_name in files:
            if file_name.endswith('.wav'):  # Assuming all files are in WAV format
                # Construct the full paths for input and output files
                input_file_path = os.path.join(root, file_name)
                output_file_path = os.path.join(output_subfolder, f"{os.path.splitext(file_name)[0]}_aug.wav")
                
                # Load stuttered speech sample (ensure duration is 3 seconds)
                audio, sr = librosa.load(input_file_path, sr=None, duration=3.0)
                
                # Randomly select a music file from the music folder
                music_files = os.listdir(music_folder)
                selected_music_file = np.random.choice(music_files)
                music_file = os.path.join(music_folder, selected_music_file)
                
                # Load selected music file
                music, sr_music = librosa.load(music_file, sr=None, duration=len(audio)/sr)
                
                # Perform augmentation
                snr = np.random.uniform(5, 15)
                scaling_factor = np.sqrt(np.var(audio) / (np.var(music) * 10 ** (snr / 10)))
                augmented_audio = audio + scaling_factor * music
                
                # Write augmented audio to the specified output path
                sf.write(output_file_path, augmented_audio, sr)


In [16]:
def augment_all_files_noise(input_folder, output_folder, noise_folder, snr_range=(0, 15)):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    # Loop through all files in the input folder and its subdirectories
    for root, _, files in os.walk(input_folder):
        # Create corresponding output subdirectory structure
        output_subfolder = os.path.join(output_folder, os.path.relpath(root, input_folder))
        os.makedirs(output_subfolder, exist_ok=True)
        
        for file_name in files:
            if file_name.endswith('.wav'):  # Assuming all files are in WAV format
                # Construct the full paths for input and output files
                input_file_path = os.path.join(root, file_name)
                output_file_path = os.path.join(output_subfolder, f"{os.path.splitext(file_name)[0]}_noise_aug.wav")
                
                # Load stuttered speech sample (ensure duration is 3 seconds)
                audio, sr = librosa.load(input_file_path, sr=None, duration=3.0)
                
                # Initialize augmented audio
                augmented_audio = np.copy(audio)
                
                # Randomly select a noise file from the noise folder
                noise_files = os.listdir(noise_folder)
                selected_noise_file = np.random.choice(noise_files)
                noise_file = os.path.join(noise_folder, selected_noise_file)
                
                # Load selected noise file with duration matching the length of the stuttered speech clip
                noise, sr_noise = librosa.load(noise_file, sr=None, duration=3.0)
                
                # Ensure noise matches the length of the audio segment
                while len(noise) < len(audio):
                    noise = np.concatenate([noise, noise[:len(audio) - len(noise)]])
                
                # Add noise at 1-second intervals
                for i in range(sr, len(audio), sr):
                    # Compute SNR
                    snr = np.random.uniform(snr_range[0], snr_range[1])
                    
                    # Compute scaling factor for noise
                    scaling_factor = np.sqrt(np.var(audio[i-sr:i]) / (np.var(noise) * 10 ** (snr / 10)))
                    
                    # Add scaled noise to audio
                    augmented_audio[i-sr:i] += scaling_factor * noise[i-sr:i]
                
                # Write augmented audio to the specified output path
                sf.write(output_file_path, augmented_audio, sr)


In [17]:
def augment_all_files_babble(input_folder, output_folder, speech_folder, snr_range=(13, 20), num_speakers_range=(3, 7)):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    # Loop through all files in the input folder and its subdirectories
    for root, _, files in os.walk(input_folder):
        # Create corresponding output subdirectory structure
        output_subfolder = os.path.join(output_folder, os.path.relpath(root, input_folder))
        os.makedirs(output_subfolder, exist_ok=True)
        
        for file_name in files:
            if file_name.endswith('.wav'):  # Assuming all files are in WAV format
                # Construct the full paths for input and output files
                input_file_path = os.path.join(root, file_name)
                output_file_path = os.path.join(output_subfolder, f"{os.path.splitext(file_name)[0]}_babble_aug.wav")
                
                # Load stuttered speech sample (ensure duration is 3 seconds)
                audio, sr = librosa.load(input_file_path, sr=None, duration=3.0)
                
                # Initialize augmented audio
                augmented_audio = np.copy(audio)
                
                # Randomly select speech files from the speech folder to create babble
                babble_audio = np.zeros_like(audio)
                num_speakers = random.randint(num_speakers_range[0], num_speakers_range[1])
                for _ in range(num_speakers):
                    speech_files = os.listdir(speech_folder)
                    selected_speech_file = np.random.choice(speech_files)
                    speech_file = os.path.join(speech_folder, selected_speech_file)
                    
                    # Load selected speech file with duration matching the length of the stuttered speech clip
                    speech, _ = librosa.load(speech_file, sr=None, duration=3.0)
                    
                    # Ensure speech matches the length of the audio segment
                    while len(speech) < len(audio):
                        speech = np.concatenate([speech, speech[:len(audio) - len(speech)]])
                    
                    # Mix speech
                    babble_audio += speech
                
                # Compute SNR
                snr = np.random.uniform(snr_range[0], snr_range[1])
                
                # Compute scaling factor for babble
                scaling_factor = np.sqrt(np.var(audio) / (np.var(babble_audio) * 10 ** (snr / 10)))
                
                # Add scaled babble to audio
                augmented_audio += scaling_factor * babble_audio
                
                # Write augmented audio to the specified output path
                sf.write(output_file_path, augmented_audio, sr)

In [14]:
def augment_all_files_rirs(input_folder, output_folder, rirs_folder):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    # Loop through all files in the input folder and its subdirectories
    for root, _, files in os.walk(input_folder):
        # Create corresponding output subdirectory structure
        output_subfolder = os.path.join(output_folder, os.path.relpath(root, input_folder))
        os.makedirs(output_subfolder, exist_ok=True)
        
        for file_name in files:
            if file_name.endswith('.wav'):  # Assuming all files are in WAV format
                # Construct the full paths for input and output files
                input_file_path = os.path.join(root, file_name)
                output_file_path = os.path.join(output_subfolder, f"{os.path.splitext(file_name)[0]}_rirs_aug.wav")
                
                # Load stuttered speech sample (ensure duration is 3 seconds)
                audio, sr = librosa.load(input_file_path, sr=None, duration=3.0)
                
                # Randomly select an RIR file from the folder
                rirs_files = os.listdir(rirs_folder)
                selected_rirs_file = np.random.choice(rirs_files)
                rirs_file = os.path.join(rirs_folder, selected_rirs_file)
                
                # Load selected RIR
                rirs, sr_rirs = librosa.load(rirs_file, sr=None)
                
                # Ensure RIRs is mono
                if len(rirs.shape) > 1:
                    rirs = rirs[:, 0]
                
                # Convolve audio with RIRs
                augmented_audio = np.convolve(audio, rirs, mode='same')
                
                # Write augmented audio to the specified output path
                sf.write(output_file_path, augmented_audio, sr)

In [15]:
input_folder = "D:\\SEM 4\\Project\\sep28k-final"
output_folder_music = "D:\\SEM 4\\Project\\augmented\\music"
output_folder_noise = "D:\\SEM 4\\Project\\augmented\\noise"
output_folder_babble = "D:\\SEM 4\\Project\\augmented\\babble"
output_folder_rirs = "D:\\SEM 4\\Project\\augmented\\rirs"
music_folder = "D:\\SEM 4\\Project\\musan\\music"
noise_folder="D:\\SEM 4\\Project\\musan\\noise"
speech_folder="D:\\SEM 4\\Project\\musan\\speech"
rirs_folder = "D:\\SEM 4\\Project\\musan\\rirs"

In [20]:
augment_all_files_music(input_folder, output_folder_music, music_folder)

In [21]:
augment_all_files_noise(input_folder, output_folder_noise, noise_folder)

In [22]:
augment_all_files_babble(input_folder,output_folder_babble,speech_folder)

In [16]:
augment_all_files_rirs(input_folder,output_folder_rirs,rirs_folder)

In [18]:
def count_wav_files(folder):
    count = 0
    for root, _, files in os.walk(folder):
        for file_name in files:
            if file_name.endswith('.wav'):
                count += 1
    return count

In [42]:
number_of_files = count_wav_files(output_folder_music)
print("Number of WAV files:", number_of_files)

Number of WAV files: 20091


In [20]:
number_of_files = count_wav_files(output_folder_noise)
print("Number of WAV files:", number_of_files)

Number of WAV files: 21836


In [21]:
number_of_files = count_wav_files(output_folder_babble)
print("Number of WAV files:", number_of_files)

Number of WAV files: 21836


In [19]:
number_of_files = count_wav_files(output_folder_rirs)
print("Number of WAV files:", number_of_files)   

Number of WAV files: 21836
