In [23]:
import os
import numpy as np
import pandas as pd
import IPython.display as ipd
import tgt
import math
import sklearn
import librosa
import re
import scipy.io.wavfile
from numpy import unique
from numpy import where
from matplotlib import pyplot as plt
import librosa.display
from sklearn import metrics
%matplotlib inline
from textGrid_AudioTrim import readTextGridUpdate,audio_trimming
import warnings
warnings.filterwarnings("ignore")

In [2]:
def zeroCrossingRate(x):
    zero_crossings = librosa.feature.zero_crossing_rate(x, pad=False)
    return zero_crossings

In [3]:
def spectralCentroid(x,sr,visualize=False):
    
    spectral_centroids = librosa.feature.spectral_centroid(x, sr=sr,n_fft=512,hop_length=128)[0]
    if(visualize):
        frames = range(len(spectral_centroids))
        t = librosa.frames_to_time(frames)
        librosa.display.waveplot(x, sr=sr, alpha=0.4)
        plt.plot(t, sklearn.preprocessing.minmax_scale(spectral_centroids, axis=0), color='r')
        plt.show()
        
    return spectral_centroids

In [4]:
#spectral rolloffs 
def spectralRollOff(x,sr,visualize=False):
    spectral_rolloff = librosa.feature.spectral_rolloff(x, sr=sr,n_fft=512,hop_length=128)[0]
    if(visualize):
        frames = range(len(spectral_rolloff))
        t = librosa.frames_to_time(frames)
        librosa.display.waveplot(x, sr=sr, alpha=0.4)
        plt.plot(t, sklearn.preprocessing.minmax_scale(spectral_rolloff, axis=0), color='r')
        plt.show()
        
    return spectral_rolloff
    

In [5]:
def LogMelspectrogram(x,sr,visualize=False):

    melspec=librosa.feature.melspectrogram(x, sr=sr,  n_fft=512, hop_length=128, power=1.0, n_mels=128, fmin=0.0, fmax=None)
    log_S=librosa.amplitude_to_db(melspec, ref=1.0, amin=1e-05, top_db=80.0)
    
    if visualize:
        plt.figure(figsize=(12,4))
        librosa.display.specshow(log_S, sr=sr, x_axis='time', y_axis='mel')
        plt.title('Log mel spectrogram')
        plt.colorbar(format='%+02.0f dB')
        plt.tight_layout()
    return log_S


In [6]:
def mfcc(x,sr,visualize=False):
    mfcc = librosa.feature.mfcc(y=x, sr=sr, n_mfcc = 13, n_fft=512, hop_length=128) # 5 MFCC components
    if visualize:
        plt.figure(figsize=(12, 6))
        plt.subplot(3,1,1)
        plt.title("MFCC  ")
        librosa.display.specshow(mfcc)
        plt.ylabel('MFCC')
        plt.colorbar()
    return mfcc

In [7]:
def Rmse(x,sr,visualize=False):
    rmse = librosa.feature.rms(x)[0]
    if(visualize):
        frames = range(len(rmse))
        t = librosa.frames_to_time(frames)
        librosa.display.waveplot(x, sr=sr, alpha=0.4)
        plt.plot(t, sklearn.preprocessing.minmax_scale(rmse, axis=0), color='r')
        plt.show()
        
    return rmse

In [8]:
def chromas(x,sr,visualize=False):
    
    C = librosa.feature.chroma_cqt(y=x, sr=16000)

    
    if visualize:
        plt.figure(figsize=(12,4))
        # To make sure that the colors span the full range of chroma values, set vmin and vmax
        librosa.display.specshow(C, sr=16000, x_axis='time', y_axis='chroma', vmin=0, vmax=1)
        plt.title('Chromagram for the sound %s' %sound)
        plt.colorbar()
        plt.tight_layout()
    return C

In [9]:
def getUpdatedAudio(audio_file_path,textGrid_file_path,word,pronunciation_vowel):
    
    start_time_word,end_time_word,start_time_pronunciation_vowel,end_time_pronunciation_vowel=readTextGridUpdate(textGrid_file_path,word,pronunciation_vowel)
    newWavFile_vowel=audio_trimming(audio_file_path,start_time_pronunciation_vowel,end_time_pronunciation_vowel)    
    return newWavFile_vowel

In [16]:
def make_dataframe(filenames,features):
    print("length of features ",len(features))
    print("length of filenames ",len(filenames))
    
    df_updated=pd.DataFrame({'feature':features})
    df_updated=df_updated.feature.apply(pd.Series)
    df_updated['file_name']=filenames
    
    return df_updated

In [20]:
def toFeature(file_paths,base_path_textGrid,base_path_audio,word,pronunciation_vowel):
    zero_crossing_features=[]
    spectral_centroid_features=[]
    spectral_rolloff_features=[]
    rmse_features=[]
    log_melspec_features=[]
    mffcs_features=[]
    chroma_features=[]
    filenames=[]

    for i,file in enumerate(file_paths):
        filenames.append(file)
        try:
            print(i)
            audio_path=f'{base_path_audio}/{file}.wav'
            textgrid_path=f'{base_path_textGrid}/{file}.TextGrid'
            sr=16000

            updated_audio=getUpdatedAudio(audio_path,textgrid_path,word,pronunciation_vowel)

            

            zero_crossing_features.append(zeroCrossingRate(updated_audio)[0])
            spectral_centroid_features.append(spectralCentroid(updated_audio,sr))
            spectral_rolloff_features.append(spectralRollOff(updated_audio,sr))
            rmse_features.append(Rmse(updated_audio,sr))
            log_melspec_features.append(np.mean(LogMelspectrogram(updated_audio,sr),axis=1))
            mffcs_features.append(np.mean(mfcc(updated_audio,sr),axis=1))
            chroma_features.append(np.mean(chromas(updated_audio,sr),axis=1))
        except Exception as e:
            print("exception caused",e)
            zero_crossing_features.append([])
            spectral_centroid_features.append([])
            spectral_rolloff_features.append([])
            rmse_features.append([])
            log_melspec_features.append([])
            mffcs_features.append([])
            chroma_features.append([])
            pass
    
    df_zeroCrossings=make_dataframe(filenames,zero_crossing_features)
    df_spectralCentroid=make_dataframe(filenames,spectral_centroid_features)
    df_spectralRollOff=make_dataframe(filenames,spectral_rolloff_features)
    df_rmse=make_dataframe(filenames,rmse_features)
    df_logMelSpec=make_dataframe(filenames,log_melspec_features)
    df_mfcc=make_dataframe(filenames,mffcs_features)
    df_chroma=make_dataframe(filenames,chroma_features)

    return df_zeroCrossings,df_spectralCentroid,df_spectralRollOff,df_rmse,df_logMelSpec,df_mfcc,df_chroma

    
    
    

    

In [None]:
#this is for muslim dataset
if __name__=="__main__":
    # three variables are needed here the two  basepaths and the dataframe path
    # as an additional feature we can definitely ask for the feature to be extracted
    base_path_textGrid="D://Himani-work/gsoc2020/dataset/muslim_dataset_version1/muslim_textGrid_dataset/"
    base_path_audio="D://Himani-work/gsoc2020/dataset/muslim_dataset_version1/muslim_wav_dataset/"
    
    textGridFiles=list(os.listdir(base_path_textGrid))
    audioFiles=list(os.listdir(base_path_audio))
    all_audio_files=list(pd.DataFrame({'audioFile':audioFiles})['audioFile'].str.split(".wav",expand=True).iloc[:,0])
    all_textGrid_files=list(pd.DataFrame({'textGridFile':textGridFiles})['textGridFile'].str.split(".TextGrid",expand=True).iloc[:,0])
    print("total texGrid and audio files",len(all_audio_files),len(all_textGrid_files))
    
    df=pd.read_excel("D://Himani-work/gsoc2020/dataset/spreadsheet_data/muslim_concordance_250_annotated.xls")
    df.dropna(inplace=True)
    all_correct_files=list(df['File Name'])
    
    print("total corrected files",len(all_correct_files))
    
    final_files=list(set(all_audio_files)&set(all_correct_files)&set(all_textGrid_files))
    print("final files",len(final_files))
    word="muslim"
    pronunciation_vowel="V"
    
    df_zeroCrossings,df_spectralCentroid,df_spectralRollOff,df_rmse,df_logMelSpec,df_mfcc,df_chroma=toFeature(final_files,base_path_textGrid,base_path_audio,word,pronunciation_vowel)

    
    df_zeroCrossings.to_csv('D://Himani-work/gsoc2020/code/Audio_Feature_Extraction/Audio_features_vowel/zeroCrossings_muslim_features_vowel.csv',index=False)
    df_spectralCentroid.to_csv('D://Himani-work/gsoc2020/code/Audio_Feature_Extraction/Audio_features_vowel/spectralCentroid_muslim_features_vowel.csv',index=False)
    df_spectralRollOff.to_csv('D://Himani-work/gsoc2020/code/Audio_Feature_Extraction/Audio_features_vowel/spectralRollOff_muslim_features_vowel.csv',index=False)
    df_logMelSpec.to_csv('D://Himani-work/gsoc2020/code/Audio_Feature_Extraction/Audio_features_vowel/logMelSpec_muslim_features_vowel.csv',index=False)
    df_mfcc.to_csv('D://Himani-work/gsoc2020/code/Audio_Feature_Extraction/Audio_features_vowel/mfcc_muslim_features_vowel.csv',index=False)
    df_chroma.to_csv('D://Himani-work/gsoc2020/code/Audio_Feature_Extraction/Audio_features_vowel/chroma_muslim_features_vowel.csv',index=False)


    

    
    
   

In [None]:
# for the ideology five words
if __name__=="__main__":
    # three variables are needed here the two  basepaths and the dataframe path
    # as an additional feature we can definitely ask for the feature to be extracted
    base_path_textGrid="D://Himani-work/gsoc2020/dataset/ideology_five_words_version2/ideology_textGrid_five_dataset_version2/"
    base_path_audio="D://Himani-work/gsoc2020/dataset/ideology_five_words_version2/ideology_wav_five_dataset_version2/"




    textGridFiles=list(os.listdir(base_path_textGrid))
    audioFiles=list(os.listdir(base_path_audio))
    all_audio_files=list(pd.DataFrame({'audioFile':audioFiles})['audioFile'].str.split(".wav",expand=True).iloc[:,0])
    all_textGrid_files=list(pd.DataFrame({'textGridFile':textGridFiles})['textGridFile'].str.split(".TextGrid",expand=True).iloc[:,0])
    print("total texGrid and audio files",len(all_audio_files),len(all_textGrid_files))



    final_files=list(set(all_audio_files)&set(all_textGrid_files))
    print("final files",len(final_files))

    df=pd.DataFrame({'filename':final_files})
    splited_df=df['filename'].str.split("clip_",expand=True)
    labels=splited_df[1].str.split("_",expand=True)
    actual_labels=labels[1]
    deleted_labels=actual_labels[actual_labels=='DELETEME']

    df.drop(deleted_labels.index,inplace=True)
    df.reset_index(drop=True, inplace=True)

    final_files=df['filename']
    print("final files",len(final_files))

    # Flow is like call the 
    word=r"ideology"
    pronunciation_vowel="aI"
    
    df_zeroCrossings,df_spectralCentroid,df_spectralRollOff,df_rmse,df_logMelSpec,df_mfcc,df_chroma=toFeature(final_files,base_path_textGrid,base_path_audio,word,pronunciation_vowel)

    
    df_zeroCrossings.to_csv('D://Himani-work/gsoc2020/code/Audio_Feature_Extraction/Audio_features_vowel/zeroCrossings_ideologyFive_features_vowel.csv',index=False)
    df_spectralCentroid.to_csv('D://Himani-work/gsoc2020/code/Audio_Feature_Extraction/Audio_features_vowel/spectralCentroid_ideologyFive_features_vowel.csv',index=False)
    df_spectralRollOff.to_csv('D://Himani-work/gsoc2020/code/Audio_Feature_Extraction/Audio_features_vowel/spectralRollOff_ideologyFive_features_vowel.csv',index=False)
    df_logMelSpec.to_csv('D://Himani-work/gsoc2020/code/Audio_Feature_Extraction/Audio_features_vowel/logMelSpec_ideologyFive_features_vowel.csv',index=False)
    df_mfcc.to_csv('D://Himani-work/gsoc2020/code/Audio_Feature_Extraction/Audio_features_vowel/mfcc_ideologyFive_features_vowel.csv',index=False)
    df_chroma.to_csv('D://Himani-work/gsoc2020/code/Audio_Feature_Extraction/Audio_features_vowel/chroma_ideologyFive_features_vowel.csv',index=False)


    


In [None]:
# for the ideology 200ms 
if __name__=="__main__":
    # three variables are needed here the two  basepaths and the dataframe path
    # as an additional feature we can definitely ask for the feature to be extracted
    base_path_textGrid="D://Himani-work/gsoc2020/dataset/ideology_extra200ms/textgrids_for_extra_200ms_MAUS/"
    base_path_audio="D://Himani-work/gsoc2020/dataset/ideology_extra200ms/with_extra_200ms"
    
    textGridFiles=list(os.listdir(base_path_textGrid))
    audioFiles=list(os.listdir(base_path_audio))
    all_audio_files=list(pd.DataFrame({'audioFile':audioFiles})['audioFile'].str.split(".wav",expand=True).iloc[:,0])
    all_textGrid_files=list(pd.DataFrame({'textGridFile':textGridFiles})['textGridFile'].str.split(".TextGrid",expand=True).iloc[:,0])
    print("total texGrid and audio files",len(all_audio_files),len(all_textGrid_files))
    
    df_ideology_200ms=pd.read_csv('D:/Himani-work/gsoc2020/dataset/spreadsheet_data/ideology_results_praat_formants_extracted_with_200ms_for_R.csv',sep='\t')
    all_correct_files=list(df_ideology_200ms['file'].str.split("\\",expand=True).iloc[:,-1].str.split(".wav",expand=True).iloc[:,0])
    print("total corrected files",len(all_correct_files))
    
    final_files=list(set(all_audio_files)&set(all_correct_files)&set(all_textGrid_files))
    print("final files",len(final_files))
    
    
    # Flow is like call the 
    word="ideology"
    pronunciation_vowel="aI"
    
    df_zeroCrossings,df_spectralCentroid,df_spectralRollOff,df_rmse,df_logMelSpec,df_mfcc,df_chroma=toFeature(final_files,base_path_textGrid,base_path_audio,word,pronunciation_vowel)

    
    df_zeroCrossings.to_csv('D://Himani-work/gsoc2020/code/Audio_Feature_Extraction/Audio_features_vowel/zeroCrossings_ideology_features_vowel.csv',index=False)
    df_spectralCentroid.to_csv('D://Himani-work/gsoc2020/code/Audio_Feature_Extraction/Audio_features_vowel/spectralCentroid_ideology_features_vowel.csv',index=False)
    df_spectralRollOff.to_csv('D://Himani-work/gsoc2020/code/Audio_Feature_Extraction/Audio_features_vowel/spectralRollOff_ideology_features_vowel.csv',index=False)
    df_logMelSpec.to_csv('D://Himani-work/gsoc2020/code/Audio_Feature_Extraction/Audio_features_vowel/logMelSpec_ideology_features_vowel.csv',index=False)
    df_mfcc.to_csv('D://Himani-work/gsoc2020/code/Audio_Feature_Extraction/Audio_features_vowel/mfcc_ideology_features_vowel.csv',index=False)
    df_chroma.to_csv('D://Himani-work/gsoc2020/code/Audio_Feature_Extraction/Audio_features_vowel/chroma_ideology_features_vowel.csv',index=False)

