# Data Augmentation 

[x] define augmentation operations functions  
[x] conduct 4 types augmentation on training set  
[x] save file and meta-data file  

#   defined the augmentation 4 functions

In [16]:
import os
import pandas as pd
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
from IPython.display import Audio
import soundfile as sf
import warnings
import csv

# Suppress the warning
warnings.filterwarnings('ignore', message='PySoundFile failed. Trying audioread instead.')


def noise(data):
    #Calculate the amplitude of the noise
    noise_amp = 0.035*np.random.uniform()*np.amax(data)   
    data_noise = data + noise_amp*np.random.normal(size=data.shape[0])  
    return data_noise

def stretch(data, rate=0.8): 
    # Apply Time stretching (rate can be set to 0.8 to slow down, 2 to speed up)
    data_stretch = librosa.effects.time_stretch(data, rate=0.8)
    return data_stretch
    
def shift(data):
    # Apply time shift (randomly shift time within the range of -5 to 5 milliseconds)
    shift_range = int(np.random.uniform(low=-5, high = 5)*1000)
    return np.roll(data, shift_range)

def pitch(data):
    
    # Trail-1
    # # Shift up by a major third (four steps if bins_per_octave is 12) 上移大三分之一（如果 bins_per_octave 为 12，则上移四步）
    # data_pitch = librosa.effects.pitch_shift(data, sr=sampling_rate, n_steps=4)


    # Trial-2
    #  # Shift down by a tritone (six steps if bins_per_octave is 12) 向下移动一个三全音（如果 bins_per_octave 为 12，则为六步）
    # data_pitch = librosa.effects.pitch_shift(data, sr=sampling_rate, n_steps=-6)   
    
    # Trial-3
    # Shift up by 3 quarter-tones 向上移动 3 个四分音
    data_pitch = librosa.effects.pitch_shift(data, sr=sampling_rate, n_steps=3,
                                         bins_per_octave=24)
       
    return data_pitch


# def pitch(data, sampling_rate, pitch_factor=0.7):      
#     # data_pitch=librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)
#     return data_pitch

In [11]:
csv_file_path = "dataset_info_combined_v4.csv"  # replace your csv file name
df = pd.read_csv(csv_file_path)

train_file_path = df[df['renamed_file_path'].str.startswith('./dataset/splitted/train/')]['renamed_file_path'].tolist()

for file_path in train_file_path:
    print(file_path)
       
print("The total amount of train dataset:", len(train_file_path))     

#The total amount of train dataset:10982


./dataset/splitted/train/CREMA-D_1022_ITS_ANG_XX_Anger_-1.wav
./dataset/splitted/train/CREMA-D_1037_ITS_ANG_XX_Anger_-1.wav
./dataset/splitted/train/CREMA-D_1060_ITS_NEU_XX_Neutrality_0.wav
./dataset/splitted/train/CREMA-D_1075_ITS_NEU_XX_Neutrality_0.wav
./dataset/splitted/train/CREMA-D_1073_IOM_DIS_XX_Disgust_-1.wav
./dataset/splitted/train/CREMA-D_1066_IOM_DIS_XX_Disgust_-1.wav
./dataset/splitted/train/CREMA-D_1078_IWL_SAD_XX_Sadness_-1.wav
./dataset/splitted/train/CREMA-D_1029_TAI_FEA_XX_Fear_-1.wav
./dataset/splitted/train/CREMA-D_1039_IEO_SAD_MD_Sadness_-1.wav
./dataset/splitted/train/CREMA-D_1008_TAI_HAP_XX_Happiness_1.wav
./dataset/splitted/train/CREMA-D_1018_TSI_FEA_XX_Fear_-1.wav
./dataset/splitted/train/CREMA-D_1028_IEO_SAD_HI_Sadness_-1.wav
./dataset/splitted/train/CREMA-D_1039_TSI_HAP_XX_Happiness_1.wav
./dataset/splitted/train/CREMA-D_1064_ITS_HAP_XX_Happiness_1.wav
./dataset/splitted/train/CREMA-D_1071_ITS_HAP_XX_Happiness_1.wav
./dataset/splitted/train/CREMA-D_1045_ITS_

#   1- Noise Injection on 10982 train data

In [12]:

# create new directory to store train data after augmented
new_directory = "./dataset/splitted/Train_Data_Augmentation/NoiseInjection"    # change to your own directory
# new_directory = "./dataset/train/Data_Augmentation/Stretch"    # change to your own directory
# new_directory = "./dataset/train/Data_Augmentation/Shift"    # change to your own directory
# new_directory = "./dataset/train/Data_Augmentation/Pitch"    # change to your own directory


if not os.path.exists(new_directory):
    os.makedirs(new_directory)


# add noise on each wav files
for file_path in train_file_path:
    try:
        # load wav file
        data, sampling_rate = librosa.load(file_path, sr=16000)   
        # add noise
        noisy_data = noise(data)
        
        #rename new noise added wav file
        file_name = os.path.basename(file_path)
        new_file_path = os.path.join(new_directory, file_name.replace('.wav', '_NoiseInjection.wav'))  # change your new wav file name
        
        # Save processed audio data as a new WAV file
        sf.write(new_file_path, noisy_data, sampling_rate)
    except FileNotFoundError:
        print(f"File not found: {file_path}. Skipping...")
        



#   2- Time Stretch on 10982 train data

In [13]:

# create new directory to store train data after augmented
new_directory = "./dataset/splitted/Train_Data_Augmentation/Stretch"    # change to your own directory


# Ensure the directory exists, create if it doesn't
if not os.path.exists(new_directory):
    os.makedirs(new_directory)

for file_path in train_file_path:
    try:
        # Load the WAV file
        data, sampling_rate = librosa.load(file_path, sr=16000)
        
        # Time stretch (modify the audio time without changing pitch)
        x = stretch(data)  # Uncomment this line if you've defined the stretch function
        
        # Generate a new file name and add it to the new directory
        file_name = os.path.basename(file_path)
        new_file_path = os.path.join(new_directory, file_name.replace('.wav', '_Stretch.wav'))
        
        # Save the processed audio data as a new WAV file
        sf.write(new_file_path, x, sampling_rate)
    except FileNotFoundError:
        print(f"File not found: {file_path}. Skipping...")


#   3- Time Shift on 10982 train data

In [14]:

# Set the directory for augmented data
new_directory = "./dataset/splitted/Train_Data_Augmentation/Shift"

# Ensure the directory exists, create if it doesn't
if not os.path.exists(new_directory):
    os.makedirs(new_directory)

# Process each WAV file
for file_path in train_file_path:
    try:
        # Load the WAV file
        data, sampling_rate = librosa.load(file_path, sr=16000)
        
        # Apply time shift (randomly shift time within the range of -5 to 5 milliseconds)
        x = shift(data)
        
        # Generate a new file name and add it to the new directory
        file_name = os.path.basename(file_path)
        new_file_path = os.path.join(new_directory, file_name.replace('.wav', '_Shift.wav'))
        
        # Save the processed audio data as a new WAV file
        sf.write(new_file_path, x, sampling_rate)
    except FileNotFoundError:
        print(f"File not found: {file_path}. Skipping...")
    except Exception as e:
        print(f"An error occurred while processing {file_path}: {e}")


#   4 - Pitch Shift on 10982 train data

In [17]:
# Set the directory for augmented data
new_directory = "./dataset/splitted/Train_Data_Augmentation/Pitch" 

# Ensure the directory exists, create if it doesn't
if not os.path.exists(new_directory):
    os.makedirs(new_directory)
    
# Process each WAV file   
for file_path in train_file_path:
    try:
        # Load the WAV file
        data, sampling_rate = librosa.load(file_path, sr=16000)
        
        # Apply pitch
        x=pitch(data)
        
        # Generate a new file name and add it to the new directory
        file_name = os.path.basename(file_path)
        new_file_path = os.path.join(new_directory, file_name.replace('.wav', '_Pitch.wav'))
        
        # Save the processed audio data as a new WAV file
        sf.write(new_file_path, x, sampling_rate)
    except FileNotFoundError:
        print(f"File not found: {file_path}. Skipping...")
    except Exception as e:
        print(f"An error occurred while processing {file_path}: {e}")



# manually replace \ with /

#   Export all generated augmented WAV files  ----- path + attitudes(augmentation_types, sentiment_values & emotional_categories)

In [25]:
import os
import csv
import pandas as pd

# Define the folder path where augmented WAV files are stored
folder_path = './dataset/splitted/Train_Data_Augmentation'

# Get the list of subfolders containing augmented WAV files
subfolders = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, f))]

# Initialize a list to store the paths of all augmented WAV files
wav_files = []

# Iterate through each subfolder to collect the paths of WAV files
for subfolder in subfolders:
    wav_files.extend([os.path.join(subfolder, f) for f in os.listdir(subfolder) if f.endswith('.wav')])

# Define the CSV file path to store the generated data
csv_file_path = 'train_data_augmentation_attitudes_final.csv'

# Open the CSV file for writing
with open(csv_file_path, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    
    # Write the header row
    writer.writerow(['File_Name', 'Augmentation_Type', 'Sentiment_Value', 'Emotional_Category'])
    
    # Iterate through each WAV file to extract its attributes and write them to the CSV file
    for wav_file in wav_files:
        file_name = os.path.basename(wav_file)
        parts = file_name.split('_')
        
        # Extract augmentation type, sentiment value, and emotional category from the file name
        augmentation_type = parts[-1].split('.')[0]  # Remove the file extension
        sentiment_value = parts[-2]
        emotional_category = parts[-3]
        
        # Write the file name and its attributes to the CSV file
        writer.writerow([wav_file, augmentation_type, sentiment_value, emotional_category])

# Print a message indicating that the CSV file has been generated
print("Train data augmentation attitudes CSV file generated:", csv_file_path)


Train data augmentation attitudes CSV file generated: train_data_augmentation_attitudes_final.csv
