# Extracting Audio Features and saving it (CSV Format) for future use 

In this notebook, I plan to extract all relevant spectral level features, and store it in a CSV format, in order to share audio feature data with the UoRochester. Once they have access to the audio files, they too can use this notebook for extracting features.

Features Extracted:
1. MFCC (All Coeffs from 1 - 13)
2. Zero Crossings
3. Spectral Centroid
4. Chroma FFT
5. RMS Energy
6. Spectral Rolloff
7. Phonation Rate
8. Speech Productivity

2 Possible angles that me and Masum discussed for using this data:
1. Using some ML model on the extracted features (such as LSTM, Transformer)
2. Using CNN on the spectrograms

In [1]:
import librosa 
import librosa.display
from scipy.io import wavfile as wav
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import kurtosis
import sklearn
from pydub import AudioSegment 
from pydub.silence import split_on_silence 
import ffmpeg
from sklearn.svm import SVC
from os.path import exists
import csv



In [2]:
# Functions for MFCC Calculations

def mfcc_mean(mfcc, coefficient_number):
    return np.mean(mfcc[coefficient_number])
def mfcc_median(mfcc, coefficient_number):
    return np.median(mfcc[coefficient_number])
def mfcc_variance(mfcc, coefficient_number):
    return np.var(mfcc[coefficient_number], dtype = np.float32)
def mfcc_standard_deviation(mfcc, coefficient_number):
    return np.std(mfcc[coefficient_number])

def mfcc_calculations(audio, sample_rate, data_list):
    mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=20)
    for coefficient in range(1, 14):
        data_list.append(mfcc_mean(mfcc, coefficient))
        data_list.append(mfcc_median(mfcc, coefficient))
        data_list.append(mfcc_variance(mfcc, coefficient))
        data_list.append(mfcc_standard_deviation(mfcc, coefficient))
    return data_list

In [3]:
# Functions for Zero Crossing Rate Calculations

def zcr_mean(zcr):
    return np.mean(zcr)
def zcr_median(zcr):
    return np.median(zcr)
def zcr_variance(zcr):
    return np.var(zcr, dtype = np.float32)
def zcr_standard_deviation(zcr):
    return np.std(zcr)

def zero_crossings_rate_calculations(audio, sample_rate, data_list):
    zcr = librosa.feature.zero_crossing_rate(audio)[0]
    data_list.append(zcr_mean(zcr))
    data_list.append(zcr_median(zcr))
    data_list.append(zcr_variance(zcr))
    data_list.append(zcr_standard_deviation(zcr))
    return data_list

In [4]:
# Functions for Spectral Centroids Calculations

def sc_mean(sc):
    return np.mean(sc)
def sc_median(sc):
    return np.median(sc)
def sc_variance(sc):
    return np.var(sc, dtype = np.float32)
def sc_standard_deviation(sc):
    return np.std(sc)

def spectral_centroid_calculations(audio, sample_rate, data_list):
    sc = librosa.feature.spectral_centroid(audio, sr=sample_rate)[0]
    data_list.append(sc_mean(sc))
    data_list.append(sc_median(sc))
    data_list.append(sc_variance(sc))
    data_list.append(sc_standard_deviation(sc))
    return data_list

In [5]:
# Functions for Chroma FFT Calculations

def chroma_mean(chroma, coefficient_number):
    return np.mean(chroma[coefficient_number])
def chroma_median(chroma, coefficient_number):
    return np.median(chroma[coefficient_number])
def chroma_variance(chroma, coefficient_number):
    return np.var(chroma[coefficient_number], dtype = np.float32)
def chroma_standard_deviation(chroma, coefficient_number):
    return np.std(chroma[coefficient_number])

def chroma_fft_calculations(audio, sample_rate, data_list):
    chroma = librosa.feature.chroma_stft(audio, sr=sample_rate, hop_length=512)
    for coefficient in range(0, 12):
        data_list.append(chroma_mean(chroma, coefficient))
        data_list.append(chroma_median(chroma, coefficient))
        data_list.append(chroma_variance(chroma, coefficient))
        data_list.append(chroma_standard_deviation(chroma, coefficient))
    return data_list

In [6]:
# Functions for RMS Energy Calculations

def rms_mean(rms):
    return np.mean(rms)
def rms_median(rms):
    return np.median(rms)
def rms_variance(rms):
    return np.var(rms, dtype = np.float32)
def rms_standard_deviation(rms):
    return np.std(rms)

def rmse_calculations(audio, sample_rate, data_list):
    rms = librosa.feature.rms(audio, frame_length=512, hop_length=256, center=True)[0]
    data_list.append(rms_mean(rms))
    data_list.append(rms_median(rms))
    data_list.append(rms_variance(rms))
    data_list.append(rms_standard_deviation(rms))
    return data_list

In [7]:
# Functions for Spectral Rolloff Calculations

def sroll_mean(sroll):
    return np.mean(sroll)
def sroll_median(sroll):
    return np.median(sroll)
def sroll_variance(sroll):
    return np.var(sroll, dtype = np.float32)
def sroll_standard_deviation(sroll):
    return np.std(sroll)

def spectral_rolloff_calculations(audio, sample_rate, data_list):
    sroll = librosa.feature.spectral_rolloff(audio+0.01, sr=sample_rate)[0]
    data_list.append(sroll_mean(sroll))
    data_list.append(sroll_median(sroll))
    data_list.append(sroll_variance(sroll))
    data_list.append(sroll_standard_deviation(sroll))
    return data_list

In [8]:
# Functions for Phonation Rate & Speech Productivity Calculations

def prosodic_calculations(audio, sample_rate, audio_duration, data_list):
    
    voiced_intervals = librosa.effects.split(y=audio, top_db=20)
    total_voiced_duration = 0
    for interval in voiced_intervals:
        total_voiced_duration = total_voiced_duration + ((interval[1]-interval[0])/sample_rate)
    # To account for overflows
    if total_voiced_duration > audio_duration:
        total_voiced_duration = audio_duration
    total_silenced_duration = audio_duration-total_voiced_duration
    phonation_rate = total_voiced_duration/audio_duration
    speech_productivity = (total_silenced_duration)/total_voiced_duration
    data_list.append(phonation_rate)
    data_list.append(speech_productivity)
    return data_list

In [9]:
# Define Excel Sheet Header. Need to add all extracted attributes

header = ["SID", "patient_BDI", "patient_PSS"]
# Add MFCCs
for coeff in range(1, 14):
    header.append("mfcc_"+str(coeff)+"_mean")
    header.append("mfcc_"+str(coeff)+"_median")
    header.append("mfcc_"+str(coeff)+"_variance")
    header.append("mfcc_"+str(coeff)+"_standard_deviation")
# Add ZC
header.append("zcr_mean")
header.append("zcr_median")
header.append("zcr_variance")
header.append("zcr_standard_deviation")
# Add SC
header.append("sc_mean")
header.append("sc_median")
header.append("sc_variance")
header.append("sc_standard_deviation")
# Add Chroma FFT
for coeff in range(0, 12):
    header.append("chroma_"+str(coeff)+"_mean")
    header.append("chroma_"+str(coeff)+"_median")
    header.append("chroma_"+str(coeff)+"_variance")
    header.append("chroma_"+str(coeff)+"_standard_deviation")
# Add RMS
header.append("rms_mean")
header.append("rms_median")
header.append("rms_variance")
header.append("rms_standard_deviation")
# Add S. Rolloff
header.append("sroll_mean")
header.append("sroll_median")
header.append("sroll_variance")
header.append("sroll_standard_deviation")
# Add Prosodic Features
header.append("phonation_rate")
header.append("speech_productivity")

In [16]:
# Define patient list, a list containing all patient IDs

clinical_info_file_path = "clinical_info.xlsx"
df = pd.read_excel(clinical_info_file_path, engine="openpyxl")
patient_list = df[['SID']].values.tolist()

In [18]:
# Extract and store features for FU1. Also store BDI and PSS within this excel sheet itself.
# In order to run this cell successfully, save filename as FU1.wav inside the folder with patient SID.

all_data = []

for patient in patient_list:
    df_patient = df.loc[df['SID'] == patient[0]]
    patient_BDI = df_patient[['BDItotalscore_1mo']].values.tolist()[0][0]
    patient_PSS = df_patient[['PSStotal_1month']].values.tolist()[0][0]
    data_list = [patient[0], patient_BDI, patient_PSS]
    fu1_audio_file_path = 'booker_audio_files/' + str(patient[0]) + '/FU1.wav' 
    
    if(exists(fu1_audio_file_path)):
        audio, sample_rate = librosa.load(fu1_audio_file_path)
        audio_duration = librosa.get_duration(filename=fu1_audio_file_path)
        data_list = mfcc_calculations(audio, sample_rate, data_list)
        data_list = zero_crossings_rate_calculations(audio, sample_rate, data_list)
        data_list = spectral_centroid_calculations(audio, sample_rate, data_list)
        data_list = chroma_fft_calculations(audio, sample_rate, data_list)
        data_list = rmse_calculations(audio, sample_rate, data_list)
        data_list = spectral_rolloff_calculations(audio, sample_rate, data_list)
        data_list = prosodic_calculations(audio, sample_rate, audio_duration, data_list)
        all_data.append(data_list)

In [19]:
# Extract and store features for FU3. Also store BDI and PSS within this excel sheet itself.
# In order to run this cell successfully, save filename as FU3.wav inside the folder with patient SID.

for patient in patient_list:
    df_patient = df.loc[df['SID'] == patient[0]]
    patient_BDI = df_patient[['BDItotalscore_3mo']].values.tolist()[0][0]
    patient_PSS = df_patient[['PSStotal_3month']].values.tolist()[0][0]
    data_list = [patient[0], patient_BDI, patient_PSS]
    fu1_audio_file_path = 'booker_audio_files/' + str(patient[0]) + '/FU3.wav' 
    
    if(exists(fu1_audio_file_path)):
        audio, sample_rate = librosa.load(fu1_audio_file_path)
        audio_duration = librosa.get_duration(filename=fu1_audio_file_path)
        data_list = mfcc_calculations(audio, sample_rate, data_list)
        data_list = zero_crossings_rate_calculations(audio, sample_rate, data_list)
        data_list = spectral_centroid_calculations(audio, sample_rate, data_list)
        data_list = chroma_fft_calculations(audio, sample_rate, data_list)
        data_list = rmse_calculations(audio, sample_rate, data_list)
        data_list = spectral_rolloff_calculations(audio, sample_rate, data_list)
        data_list = prosodic_calculations(audio, sample_rate, audio_duration, data_list)
        all_data.append(data_list)

In [20]:
# Extract and store features for FU6. Also store BDI and PSS within this excel sheet itself.
# In order to run this cell successfully, save filename as FU6.wav inside the folder with patient SID

for patient in patient_list:
    df_patient = df.loc[df['SID'] == patient[0]]
    patient_BDI = df_patient[['BDItotalscore_6mo']].values.tolist()[0][0]
    patient_PSS = df_patient[['PSStotal_6month']].values.tolist()[0][0]
    data_list = [patient[0], patient_BDI, patient_PSS]
    fu1_audio_file_path = 'booker_audio_files/' + str(patient[0]) + '/FU6.wav' 
    
    if(exists(fu1_audio_file_path)):
        audio, sample_rate = librosa.load(fu1_audio_file_path)
        audio_duration = librosa.get_duration(filename=fu1_audio_file_path)
        data_list = mfcc_calculations(audio, sample_rate, data_list)
        data_list = zero_crossings_rate_calculations(audio, sample_rate, data_list)
        data_list = spectral_centroid_calculations(audio, sample_rate, data_list)
        data_list = chroma_fft_calculations(audio, sample_rate, data_list)
        data_list = rmse_calculations(audio, sample_rate, data_list)
        data_list = spectral_rolloff_calculations(audio, sample_rate, data_list)
        data_list = prosodic_calculations(audio, sample_rate, audio_duration, data_list)
        all_data.append(data_list)

In [21]:
# Extract and store features for FU12. Also store BDI and PSS within this excel sheet itself.
# In order to run this cell successfully, save filename as FU12.wav inside the folder with patient SID

for patient in patient_list:
    df_patient = df.loc[df['SID'] == patient[0]]
    patient_BDI = df_patient[['BDItotalscore_12mo']].values.tolist()[0][0]
    patient_PSS = df_patient[['PSStotal_12month']].values.tolist()[0][0]
    data_list = [patient[0], patient_BDI, patient_PSS]
    fu1_audio_file_path = 'booker_audio_files/' + str(patient[0]) + '/FU12.wav' 
    
    if(exists(fu1_audio_file_path)):
        audio, sample_rate = librosa.load(fu1_audio_file_path)
        audio_duration = librosa.get_duration(filename=fu1_audio_file_path)
        data_list = mfcc_calculations(audio, sample_rate, data_list)
        data_list = zero_crossings_rate_calculations(audio, sample_rate, data_list)
        data_list = spectral_centroid_calculations(audio, sample_rate, data_list)
        data_list = chroma_fft_calculations(audio, sample_rate, data_list)
        data_list = rmse_calculations(audio, sample_rate, data_list)
        data_list = spectral_rolloff_calculations(audio, sample_rate, data_list)
        data_list = prosodic_calculations(audio, sample_rate, audio_duration, data_list)
        all_data.append(data_list)


In [22]:
f = open('spectral_features.csv', 'w', encoding='UTF8', newline='')
writer = csv.writer(f)
writer.writerow(header)
writer.writerows(all_data)
f.close()