In [7]:
import os
import shutil
from glob import glob
import pandas as pd
from tqdm import tqdm
import numpy as np

import librosa
import librosa.display
from IPython.display import Audio

In [2]:
current_path=os.getcwd().replace("\\", "/").replace("c:", "C:")
wav_path=current_path+'/wavFiles/'

In [3]:
wav_df=pd.read_csv(current_path+"/wav_df.csv")

In [5]:
def noise(data):
    noise_amp = 0.035*np.random.uniform()*np.amax(data)
    data = data + noise_amp*np.random.normal(size=data.shape[0])
    return data

def stretch(data, rate=0.8):
    return librosa.effects.time_stretch(data, rate=0.8)

def shift(data):
    shift_range = int(np.random.uniform(low=-5, high = 5)*1000)
    return np.roll(data, shift_range)

def pitch(data, sampling_rate):
    return librosa.effects.pitch_shift(data,sr=sampling_rate, n_steps=1)

In [6]:
def extract_features(data, sample_rate):
    # ZCR
    result = np.array([])
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
    result=np.hstack((result, zcr)) # stacking horizontally

    # Chroma_stft
    stft = np.abs(librosa.stft(data))
    chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
    result = np.hstack((result, chroma_stft)) # stacking horizontally

    # MFCC
    mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mfcc)) # stacking horizontally

    # Root Mean Square Value
    rms = np.mean(librosa.feature.rms(y=data).T, axis=0)
    result = np.hstack((result, rms)) # stacking horizontally

    # MelSpectogram
    mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mel)) # stacking horizontally

    return result

In [8]:
def get_features(path):

    data, sample_rate = librosa.load(path, duration=2.5, offset=0.0)

    # without augmentation
    res1 = extract_features(data, sample_rate)
    result = np.array(res1)

    # data with noise
    noise_data = noise(data)
    res2 = extract_features(noise_data, sample_rate)
    result = np.concatenate((result, res2), axis = 0)

    # data with stretching and pitching
    new_data = stretch(data)
    data_stretch_pitch = pitch(new_data, sample_rate)
    res3 = extract_features(data_stretch_pitch, sample_rate)
    result = np.concatenate((result, res3), axis = 0)

    return result

In [9]:
wav_df.head()

Unnamed: 0,filename,label,sentence
0,301_AUDIO_112268.0_113068.0_SPLIT.wav,0,the traffic
1,301_AUDIO_116188.0_122918.0_SPLIT.wav,0,the traffic is horrible well probably traffic ...
2,301_AUDIO_126698.0_135338.0_SPLIT.wav,0,not really i mean i have enough things going o...
3,301_AUDIO_142168.0_144338.0_SPLIT.wav,0,i studied uh business
4,301_AUDIO_145058.0_145518.0_SPLIT.wav,0,i did


In [10]:
wav_path

'C:/Users/82105/Desktop/코드/english_depression/wavFiles/'

In [12]:
X_audio, Y = [], []
for path, label in tqdm(zip(wav_df['filename'], wav_df['label']), total=len(wav_df)):
    try:
        audio_features = get_features(wav_path + path)
        X_audio.append(audio_features)
        Y.append(label)
    except ValueError as e:
        print(f"Error processing file {path}: {e}")

 92%|█████████▏| 5292/5729 [07:16<00:13, 31.94it/s]

Error processing file 473_AUDIO_242325.0_247795.0_SPLIT.wav: can't extend empty axis 0 using modes other than 'constant' or 'empty'
Error processing file 473_AUDIO_314305.0_315055.0_SPLIT.wav: can't extend empty axis 0 using modes other than 'constant' or 'empty'
Error processing file 473_AUDIO_320425.0_321875.0_SPLIT.wav: can't extend empty axis 0 using modes other than 'constant' or 'empty'
Error processing file 473_AUDIO_418215.0_420295.0_SPLIT.wav: can't extend empty axis 0 using modes other than 'constant' or 'empty'
Error processing file 473_AUDIO_424695.0_428795.0_SPLIT.wav: can't extend empty axis 0 using modes other than 'constant' or 'empty'
Error processing file 473_AUDIO_471545.0_472765.0_SPLIT.wav: can't extend empty axis 0 using modes other than 'constant' or 'empty'
Error processing file 473_AUDIO_482835.0_483765.0_SPLIT.wav: can't extend empty axis 0 using modes other than 'constant' or 'empty'
Error processing file 473_AUDIO_497315.0_498255.0_SPLIT.wav: can't extend em

100%|██████████| 5729/5729 [07:51<00:00, 12.14it/s]


In [14]:
audio_features = pd.DataFrame(X_audio)

In [20]:
len(wav_df), audio_features.shape, len(Y)

(5729, (5721, 486), 5721)

In [15]:
for i in audio_features.columns:
  if audio_features[i].isnull().sum()!=0:
    print(i)

In [25]:
probelm_list=["473_AUDIO_242325.0_247795.0_SPLIT.wav", "473_AUDIO_314305.0_315055.0_SPLIT.wav", 
              "473_AUDIO_320425.0_321875.0_SPLIT.wav", "473_AUDIO_418215.0_420295.0_SPLIT.wav",
              "473_AUDIO_424695.0_428795.0_SPLIT.wav", "473_AUDIO_471545.0_472765.0_SPLIT.wav",
              "473_AUDIO_482835.0_483765.0_SPLIT.wav", "473_AUDIO_497315.0_498255.0_SPLIT.wav"]

In [28]:
wav_df=wav_df[~wav_df["filename"].isin(probelm_list)]

In [29]:
audio_features.reset_index(inplace=True, drop=True)
wav_df.reset_index(inplace=True, drop=True)

In [32]:
final_df=pd.concat([audio_features, wav_df], axis=1)
final_df.to_csv(current_path+'/final_df.csv', index=False)