# Data preprocessing for fluency classification

The goal of this notebook is to generate a csv file that contains for each row the extracted features for a particular audio file and the corresponding fluency class.

## Importing packages

In [29]:
from pyAudioAnalysis import ShortTermFeatures as aF
from pyAudioAnalysis import audioBasicIO as aIO 
from pyAudioAnalysis import MidTermFeatures as aFm

import plotly.graph_objs as go 
import plotly
import IPython

import glob
import numpy as np
import pandas as pd
import parselmouth
from parselmouth.praat import call

import pandas as pd
import matplotlib.pyplot as plt
import random
#
import os, sklearn.cluster
from pyAudioAnalysis.MidTermFeatures import mid_feature_extraction as mT
from pyAudioAnalysis.audioBasicIO import read_audio_file, stereo_to_mono
from pyAudioAnalysis.audioSegmentation import labels_to_segments
#from pyAudioAnalysis.audioTrainTest import normalize_features
import numpy as np
import scipy.io.wavfile as wavfile
import IPython
import sys

# librosa is a Python library for analyzing audio and music. It can be used to extract the data from the audio files we will see it later.
import librosa
import librosa.display
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split


In [52]:
low = "fluency_dataset/Low/"
intermediate = "fluency_dataset/Intermediate/"
high = "fluency_dataset/High/"

## Features extraction

In [8]:
def measurePitch(voiceID, f0min, f0max, unit):
    sound = parselmouth.Sound(voiceID) # read the sound
    pitch = call(sound, "To Pitch", 0.0, f0min, f0max) #create a praat pitch object
    meanF0 = call(pitch, "Get mean", 0, 0, unit) # get mean pitch
    stdevF0 = call(pitch, "Get standard deviation", 0 ,0, unit) # get standard deviation
    harmonicity = call(sound, "To Harmonicity (cc)", 0.01, 75, 0.1, 1.0)
    hnr = call(harmonicity, "Get mean", 0, 0)
    pointProcess = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
    localJitter = call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
    localabsoluteJitter = call(pointProcess, "Get jitter (local, absolute)", 0, 0, 0.0001, 0.02, 1.3)
    rapJitter = call(pointProcess, "Get jitter (rap)", 0, 0, 0.0001, 0.02, 1.3)
    ppq5Jitter = call(pointProcess, "Get jitter (ppq5)", 0, 0, 0.0001, 0.02, 1.3)
    ddpJitter = call(pointProcess, "Get jitter (ddp)", 0, 0, 0.0001, 0.02, 1.3)
    localShimmer =  call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    localdbShimmer = call([sound, pointProcess], "Get shimmer (local_dB)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    apq3Shimmer = call([sound, pointProcess], "Get shimmer (apq3)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    aqpq5Shimmer = call([sound, pointProcess], "Get shimmer (apq5)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    apq11Shimmer =  call([sound, pointProcess], "Get shimmer (apq11)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    ddaShimmer = call([sound, pointProcess], "Get shimmer (dda)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    

    return meanF0, stdevF0, hnr, localJitter, localabsoluteJitter, rapJitter, ppq5Jitter, ddpJitter, localShimmer, localdbShimmer, apq3Shimmer, aqpq5Shimmer, apq11Shimmer, ddaShimmer

In [56]:
def noise(data):
    noise_amp = 0.035*np.random.uniform()*np.amax(data)
    data = data + noise_amp*np.random.normal(size=data.shape[0])
    return data

def stretch(data, rate=0.8):
    return librosa.effects.time_stretch(data, rate)

def shift(data):
    shift_range = int(np.random.uniform(low=-5, high = 5)*1000)
    return np.roll(data, shift_range)

def pitch(data, sampling_rate, pitch_factor=0.7):
    return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)


In [68]:
def ext_features (s_, fs_):
    #fs_, s_ = aIO.read_audio_file(path)
    s_mono_ = aIO.stereo_to_mono(s_)


    segment_len = fs_ * len(s_mono_)
    mt, st, mt_n = aFm.mid_feature_extraction(s_mono_, fs_, segment_len * fs_, segment_len * fs_, 
                                     0.05 * fs_, 0.05 * fs_)
    mt_praat = measurePitch(s_mono_, 75, 500, "Hertz")

    mt = mt.reshape(-1)
    features = np.concatenate((mt, mt_praat), axis=None)
    return features

def get_features(path):
    # duration and offset are used to take care of the no audio in start and the ending of each audio files as seen above.
    s_, fs_ = librosa.load(path, duration=2.5, offset=0.6)
    # without augmentation
    res1 = ext_features(s_, fs_)
    result = np.array(res1)
    
    return result

def get_aug_features(path):
    # duration and offset are used to take care of the no audio in start and the ending of each audio files as seen above.
    """    data, sample_rate = librosa.load(path, duration=2.5, offset=0.6)
    s_, fs_ = librosa.load(path, duration=2.5, offset=0.6)"""
    fs_, s_ = aIO.read_audio_file(path)
    # without augmentation
    res1 = ext_features(s_, fs_)
    result = np.array(res1)
    
    """# data with noise
    noise_data = noise(s_)
    res2 = ext_features(noise_data, fs_)
    result = np.vstack((result, res2)) # stacking vertically"""
    
    
    return result

In [37]:
import sounddevice as sd

sd.play(s_)

In [40]:
print(len(s_)/fs_)

5.0


In [54]:
list_of_files = os.listdir(high)
for i in os.listdir(high):
    os.rename(high+i, high+i+".mp3")

In [42]:
list_of_files = os.listdir(low)

In [44]:
print(list_of_files)

['Elderly Chinese street cleaner speaks fluent English segment 29 - E', 'Avalinguo - Xoca and Josué segment 68 - J', 'Avalinguo - Xoca and Josué segment 110 - J', 'Interview with a Filippines Woman segment 54 - W', 'Avalinguo - Itzel and Friend segment 4 - I', 'Avalinguo - Dana and Konay segment 21 - D', 'Avalinguo - Dana and Konay segment 34 - D', 'Avalinguo - Itzel and Friend segment 5 - I', 'Avalinguo - Itzel and Friend segment 10 - I', 'Avalinguo - Xoca and Josué segment 105 - J', 'Avalinguo - Xoca and Josué segment 111 - J', 'Avalinguo - Xoca and Josué segment 55 - J', 'Elderly Chinese street cleaner speaks fluent English segment 28 - E', 'Avalinguo - Xoca and Josué segment 57 - J', 'Avalinguo - Xoca and Josué segment 113 - J', 'Interview with a Filippines Woman segment 43 - W', 'Avalinguo - Xoca and Josué segment 80 - J', 'Interview with a Filippines Woman segment 57 - W', 'Avalinguo - Itzel and Friend segment 12 - I', 'Avalinguo - Itzel and Friend segment 7 - I', 'Avalin

In [70]:
X, Y = [], []

classes = ["low","intermediate","high"]

for file in os.listdir(low):
    try:
        feature = get_aug_features(low+file)
        X.append(feature)
        # appending emotion 3 times as we have made 3 augmentation techniques on each audio file.
        Y.append("low")
    except Exception as e:
        print("[Error] there was an error in feature extraction. %s" % (e))
        continue


for file in os.listdir(intermediate):
    try:
        feature = get_aug_features(intermediate+file)
        X.append(feature)
        # appending emotion 3 times as we have made 3 augmentation techniques on each audio file.
        Y.append("intermediate")
    except Exception as e:
        print("[Error] there was an error in feature extraction. %s" % (e))
        continue

for file in os.listdir(high):
    try:
        feature = get_aug_features(high+file)
        X.append(feature)
        # appending emotion 3 times as we have made 3 augmentation techniques on each audio file.
        Y.append("high")
    except Exception as e:
        print("[Error] there was an error in feature extraction. %s" % (e))
        continue

Error: unknown file type {extension}
[Error] there was an error in feature extraction. zero-size array to reduction operation maximum which has no identity



Mean of empty slice.


invalid value encountered in double_scalars



In [74]:
print(np.array(Y).shape)

(1424,)


In [75]:
Features = pd.DataFrame(X)
Features['labels'] = Y
Features.to_csv('features_fluency.csv', index=False)
Features.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,141,142,143,144,145,146,147,148,149,labels
0,0.053206,0.007045,3.004705,0.140346,0.178221,0.476237,0.00961,0.093883,-31.200013,2.193567,...,0.008224,0.008669,0.024672,0.115187,1.027144,0.046588,0.070801,0.145854,0.139764,low
1,0.050167,0.003563,3.019958,0.149085,0.187342,0.487404,0.005942,0.090217,-32.670208,1.871691,...,0.003729,0.003974,0.011186,0.09126,0.799435,0.030839,0.046953,0.104983,0.092518,low
2,0.049796,0.006729,2.950327,0.129346,0.176377,0.39556,0.009366,0.074483,-31.200523,2.474869,...,0.004962,0.006105,0.014887,0.145358,1.269955,0.045055,0.078931,0.187112,0.135166,low
3,0.051284,0.014177,3.056261,0.110161,0.148898,0.386496,0.010893,0.069542,-27.054889,2.143842,...,0.012481,0.014866,0.037442,0.167999,1.477966,0.079534,0.106513,0.183345,0.238601,low
4,0.056132,0.011475,3.083705,0.110317,0.146045,0.461578,0.006427,0.08655,-28.229155,1.6787,...,0.007202,0.008305,0.021607,0.118135,1.119456,0.047241,0.076031,0.137085,0.141724,low
