In [1]:
import numpy as np
import matplotlib.pyplot as plt
import librosa as lr
import pandas as pd
import soundfile as sf
import shutil

import os

In [2]:
DIR = "../../dataset_info"
TRAIN_DATASET_DIR = "../../extracted_audio/train"

In [3]:
train_df = pd.read_csv(f"{DIR}/train_split.csv")

depressed = train_df[train_df["PHQ_Binary"] == 1]
depressed.shape

(37, 6)

In [4]:
depressed_ids = depressed["Participant_ID"].values
depressed_ids

array([308, 309, 311, 319, 330, 332, 337, 338, 339, 345, 346, 348, 351,
       353, 354, 355, 359, 362, 367, 372, 376, 377, 384, 389, 405, 410,
       414, 421, 426, 440, 453, 461, 641, 673, 677, 680, 684])

In [19]:
depressed_audio_files = []

for file in sorted(os.listdir(TRAIN_DATASET_DIR)):
    # if file has S or P or B in filename then skip
    if "S" in file or "P" in file or "B" in file:
        continue
    if int(file.split("_")[0]) in depressed_ids:
        depressed_audio_files.append(file)

In [16]:
# shiffting the audio
def shift_audio(data, sampling_rate, shift_max, shift_direction):
    shift = np.random.randint(sampling_rate * shift_max)
    if shift_direction == 'right':
        shift = -shift
    elif shift_direction == 'both':
        direction = np.random.randint(0, 2)
        if direction == 1:
            shift = -shift    
    
    augmented_data = np.roll(data, shift)
    # Set to silence for heading/ tailing
    if shift > 0:
        augmented_data[:shift] = 0
    else:
        augmented_data[shift:] = 0
    return augmented_data

def add_noise(data, noise_factor):
    noise = np.random.randn(len(data))
    augmented_data = data + noise_factor * noise
    augmented_data = augmented_data / augmented_data.max()
    return augmented_data

# Using only pitch and speed as of now
def change_pitch(data, sampling_rate, pitch_factor):
    return lr.effects.pitch_shift(data, sr=sampling_rate, n_steps=pitch_factor)

def change_speed(data, speed_factor):
    return lr.effects.time_stretch(data, rate=speed_factor)

In [46]:
AUG_TECHNIQUES = ["S", "P", "B"]

def augment_audio(audio_files):
    temp_df = train_df.copy()
    for tech in AUG_TECHNIQUES:
        for file in audio_files:
            print(f"Augmenting {file} using {tech}")
            y,sr = lr.load(os.path.join(TRAIN_DATASET_DIR, file))
            
            if tech == "S":
                speed_factor = np.random.uniform(0.8, 1.2)
                print(f"Speed factor: {speed_factor}")
                augmented_data = change_speed(y, speed_factor)
                
            elif tech == "P":
                pitch_factor = 0
                while pitch_factor == 0:
                    pitch_factor = np.random.randint(-3, 3)
                
                print(f"Pitch factor: {pitch_factor}")
                augmented_data = change_pitch(y, sr, pitch_factor)
            else:
                speed_factor = np.random.uniform(0.8, 1.2)
                pitch_factor = np.random.randint(-3, 3)
                
                while pitch_factor == 0:
                    pitch_factor = np.random.randint(-3, 3)
                
                print(f"Speed factor: {speed_factor}")
                print(f"Pitch factor: {pitch_factor}")
                augmented_data = change_speed(y, speed_factor)
                augmented_data = change_pitch(augmented_data, sr, pitch_factor)
                
            # save the augmented data
            file_name = file.split(".")[0]
            augmented_file_name = f"{file_name}_{tech}.wav"
            sf.write(os.path.join(TRAIN_DATASET_DIR, augmented_file_name), augmented_data, sr)
            
            # get row from train_df and duplicate it
            row = train_df[train_df["Participant_ID"] == int(file.split("_")[0])]
            new_row = row.copy()
            # change id so that it can be distinguished from normal audio
            new_row["Participant_ID"] = f"{int(file.split('_')[0])}_{tech}" # new id = 308_S
            # append new row to train_df
            temp_df = pd.concat([temp_df, new_row], ignore_index=True)
            print("-----------------------")
    return temp_df

In [None]:
new_train_df = augment_audio(depressed_audio_files)

In [None]:
new_train_df.to_csv(os.path.join(DIR, "train_split_augmented.csv"), index=False)

In [30]:
depressed_audio_files = []

for file in sorted(os.listdir(TRAIN_DATASET_DIR)):
    # if file has S or P or B in filename then skip
    if "S" in file or "P" in file or "B" in file:
        depressed_audio_files.append(file)

In [50]:
train_augmented_df = train_df.copy()

In [51]:
for file in depressed_audio_files:
    id, _, tech = file.split("_")
    row = train_df[train_df["Participant_ID"] == int(id)]
    new_row = row.copy()
    new_row["Participant_ID"] = f"{id}_{tech[0]}"
    train_augmented_df = pd.concat([train_augmented_df, new_row], ignore_index=True)

In [53]:
train_augmented_df.to_csv(os.path.join(DIR, "train_split_augmented.csv"), index=False)