In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import librosa, os
import librosa.display
import numpy as np
import pandas as pd
import scipy.io.wavfile
import matplotlib.pyplot as plt
from tqdm import tqdm
import tensorflow as tf
from matplotlib.pyplot import specgram
import keras
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding
from keras.layers import LSTM
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.layers import Input, Flatten, Dropout, Activation
from keras.layers import Conv1D, MaxPooling1D, AveragePooling1D
from keras.models import Model
from keras.callbacks import ModelCheckpoint
from sklearn.metrics import confusion_matrix

Using TensorFlow backend.


In [29]:
DATASET_PATH = 'C:\\Users\\apoochelvan\\Downloads\\Audio_Speech_Actors_01-24\\'
mylist = os.listdir(DATASET_PATH)
print(os.listdir(DATASET_PATH))

['Actor_01', 'Actor_02', 'Actor_03', 'Actor_04', 'Actor_05', 'Actor_06', 'Actor_07', 'Actor_08', 'Actor_09', 'Actor_10', 'Actor_11', 'Actor_12', 'Actor_13', 'Actor_14', 'Actor_15', 'Actor_16', 'Actor_17', 'Actor_18', 'Actor_19', 'Actor_20', 'Actor_21', 'Actor_22', 'Actor_23', 'Actor_24']


In [20]:
CLASSES = {
    '01' : 'neutral',
    '02' : 'calm', 
    '03' : 'happy',
    '04' : 'sad',
    '05' : 'angry',
    '06' : 'fearful',
    '07' : 'disgust',
    '08' : 'surprised'
}

In [21]:
def get_features(file_loc):
    X, sr = librosa.load(file_loc, res_type = 'kaiser_fast',
                         duration = 2.5, sr = 22050 * 2,
                         offset = 0.5)
    sample_rate = np.array(sr)
    mfccs = np.mean(librosa.feature.mfcc(y = X, sr = sample_rate, n_mfcc = 13), axis = 0)
    return [mfccs]

In [22]:
features = pd.DataFrame(columns = ['feature'])
labels = pd.DataFrame(columns = ['labels'])
c = 0
for folder in os.listdir(DATASET_PATH):
    for file in tqdm(os.listdir(DATASET_PATH + folder)):
        features.loc[c] = get_features(DATASET_PATH + folder + '/' + file)
        labels.loc[c] = CLASSES[file.split('-')[2]]
        c += 1

100%|██████████████████████████████████████████| 60/60 [00:03<00:00, 19.13it/s]
100%|██████████████████████████████████████████| 60/60 [00:02<00:00, 24.99it/s]
100%|██████████████████████████████████████████| 60/60 [00:02<00:00, 24.54it/s]
100%|██████████████████████████████████████████| 60/60 [00:02<00:00, 25.34it/s]
100%|██████████████████████████████████████████| 60/60 [00:02<00:00, 25.19it/s]
100%|██████████████████████████████████████████| 60/60 [00:02<00:00, 24.89it/s]
100%|██████████████████████████████████████████| 60/60 [00:02<00:00, 24.07it/s]
100%|██████████████████████████████████████████| 60/60 [00:02<00:00, 25.20it/s]
100%|██████████████████████████████████████████| 60/60 [00:02<00:00, 25.27it/s]
100%|██████████████████████████████████████████| 60/60 [00:02<00:00, 24.46it/s]
100%|██████████████████████████████████████████| 60/60 [00:02<00:00, 25.44it/s]
100%|██████████████████████████████████████████| 60/60 [00:02<00:00, 24.23it/s]
100%|███████████████████████████████████

In [23]:
features.head()

Unnamed: 0,feature
0,"[-65.8009712673087, -65.8009712673087, -65.800..."
1,"[-61.84994914561038, -62.89503283066965, -63.8..."
2,"[-65.81886875834394, -65.81886875834394, -65.8..."
3,"[-66.05848538589821, -66.05848538589821, -66.0..."
4,"[-70.2677641610773, -70.2677641610773, -70.267..."


In [38]:
features[:5]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,207,208,209,210,211,212,213,214,215,labels
0,-65.800971,-65.800971,-65.800971,-65.800971,-65.800971,-65.800971,-65.800971,-65.800971,-65.800971,-65.800971,...,-62.295248,-64.93013,-65.332128,-64.116934,-64.319911,-64.971422,-63.155779,-64.178668,-63.061902,neutral
1,-61.849949,-62.895033,-63.830647,-60.55259,-60.821684,-62.073396,-64.889228,-65.389948,-65.389948,-65.389948,...,-65.389948,-65.389948,-65.389948,-65.389948,-65.389948,-65.389948,-65.389948,-65.389948,-65.389948,neutral
2,-65.818869,-65.818869,-65.818869,-65.818869,-65.805538,-65.818869,-65.818869,-65.629539,-64.802625,-65.818869,...,-65.818869,-65.818869,-65.818869,-65.43806,-65.684853,-65.818869,-65.818869,-65.818869,-65.818869,neutral
3,-66.058485,-66.058485,-66.058485,-66.058485,-66.058485,-66.058485,-66.058485,-66.058485,-66.058485,-66.058485,...,-55.826307,-57.741989,-59.796385,-60.975377,-62.663056,-64.57348,-63.782612,-61.515785,-58.214885,neutral
4,-70.267764,-70.267764,-70.267764,-70.267764,-70.267764,-70.267764,-70.267764,-70.267764,-70.267764,-70.267764,...,-57.447461,-58.896493,-58.751002,-57.405669,-60.078475,-63.426811,-62.638537,-61.082741,-60.234652,calm


In [40]:
df3 = pd.DataFrame(df['feature'].values.tolist())

In [42]:
newdf = pd.concat([df3,labels], axis=1)

In [43]:
from sklearn.utils import shuffle
features = shuffle(newdf)
features[:10]

Unnamed: 0,labels
800,sad
686,sad
964,calm
1404,sad
1126,disgust
313,happy
655,surprised
101,fearful
253,happy
881,fearful


In [25]:
features = pd.DataFrame(features['feature'].values.tolist())
features = pd.concat([features, labels], axis = 1)
features.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,207,208,209,210,211,212,213,214,215,labels
0,-65.800971,-65.800971,-65.800971,-65.800971,-65.800971,-65.800971,-65.800971,-65.800971,-65.800971,-65.800971,...,-62.295248,-64.93013,-65.332128,-64.116934,-64.319911,-64.971422,-63.155779,-64.178668,-63.061902,neutral
1,-61.849949,-62.895033,-63.830647,-60.55259,-60.821684,-62.073396,-64.889228,-65.389948,-65.389948,-65.389948,...,-65.389948,-65.389948,-65.389948,-65.389948,-65.389948,-65.389948,-65.389948,-65.389948,-65.389948,neutral
2,-65.818869,-65.818869,-65.818869,-65.818869,-65.805538,-65.818869,-65.818869,-65.629539,-64.802625,-65.818869,...,-65.818869,-65.818869,-65.818869,-65.43806,-65.684853,-65.818869,-65.818869,-65.818869,-65.818869,neutral
3,-66.058485,-66.058485,-66.058485,-66.058485,-66.058485,-66.058485,-66.058485,-66.058485,-66.058485,-66.058485,...,-55.826307,-57.741989,-59.796385,-60.975377,-62.663056,-64.57348,-63.782612,-61.515785,-58.214885,neutral
4,-70.267764,-70.267764,-70.267764,-70.267764,-70.267764,-70.267764,-70.267764,-70.267764,-70.267764,-70.267764,...,-57.447461,-58.896493,-58.751002,-57.405669,-60.078475,-63.426811,-62.638537,-61.082741,-60.234652,calm


In [26]:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from matplotlib.pyplot import specgram
import keras
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding
from keras.layers import LSTM
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.layers import Input, Flatten, Dropout, Activation
from keras.layers import Conv1D, MaxPooling1D, AveragePooling1D
from keras.models import Model
from keras.callbacks import ModelCheckpoint
from sklearn.metrics import confusion_matrix

In [27]:
from keras import regularizers

import os

In [30]:
type(mylist)

list

In [33]:
print(mylist[10][6:-16])




In [36]:

df = pd.DataFrame(columns=['feature'])
bookmark=0
for index,y in enumerate(mylist):
    if mylist[index][6:-16]!='01' and mylist[index][6:-16]!='07' and mylist[index][6:-16]!='08' and mylist[index][:2]!='su' and mylist[index][:1]!='n' and mylist[index][:1]!='d':
        X, sample_rate = librosa.load('C:\\Users\\apoochelvan\\Downloads\\Audio_Speech_Actors_01-24\\'+y, res_type='kaiser_fast',duration=2.5,sr=22050*2,offset=0.5)
        sample_rate = np.array(sample_rate)
        mfccs = np.mean(librosa.feature.mfcc(y=X, 
                                            sr=sample_rate, 
                                            n_mfcc=13),
                        axis=0)
        feature = mfccs
        #[float(i) for i in feature]
        #feature1=feature[:135]
        df.loc[bookmark] = [feature]
        bookmark=bookmark+1

PermissionError: [Errno 13] Permission denied: 'C:\\Users\\apoochelvan\\Downloads\\Audio_Speech_Actors_01-24\\Actor_01'