In [2]:
#Author: Eren Ali Aslangiray

import librosa
from librosa import display
import time
import os
import pandas as pd
import joblib
import numpy as np

# ----------------------DATA PREPARING----------------------

# 1- RAVDESS DATABASE

## 1.1- Data Extraction

In [24]:
path = '/Users/erenmac/Desktop/NEW_DATA_VOICE/Audio_Speech_Actors_01-24'
lst = []

for subdir, dirs, files in os.walk(path):
    for file in files:
        if os.path.join(subdir,file) == "/Users/erenmac/Desktop/NEW_DATA_VOICE/Audio_Speech_Actors_01-24/.DS_Store":
            continue
        X, sample_rate = librosa.load(os.path.join(subdir,file))
        file = int(file[7:8]) - 1
        arr = X, file
        lst.append(arr)
        if sample_rate != 22050:
            print (sample_rate)

In [26]:
X, y = zip(*lst)
X = np.asarray(X)
y = np.asarray(y)

In [33]:
X_name = 'X.joblib'
y_name = 'y.joblib'
save_dir = '/Users/erenmac/Desktop/NEW_DATA_VOICE/Audio_Speech_Actors_01-24/Joblib_saves'

savedX = joblib.dump(X, os.path.join(save_dir, X_name))
savedy = joblib.dump(y, os.path.join(save_dir, y_name))

## 1.2- Data Load

In [74]:
X = joblib.load('/Users/erenmac/Desktop/NEW_DATA_VOICE/Audio_Speech_Actors_01-24/Joblib_saves/X.joblib')
y = joblib.load('/Users/erenmac/Desktop/NEW_DATA_VOICE/Audio_Speech_Actors_01-24/Joblib_saves/y.joblib')

## 1.3- MEAN MFCCs KERAS MODEL (With 8 Emotion Label)

In [78]:
import keras
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding
from keras.utils import to_categorical
from keras.layers import Input, Flatten, Dropout, Activation
from keras.layers import Conv1D, MaxPooling1D
from keras.models import Model
from keras.callbacks import ModelCheckpoint

x_traincnn = np.expand_dims(X_train, axis=2)
x_testcnn = np.expand_dims(X_test, axis=2)

Using TensorFlow backend.


In [80]:
model = Sequential()

model.add(Conv1D(128, 5,padding='same',
                 input_shape=(40,1)))
model.add(Activation('relu'))
model.add(Dropout(0.1))
model.add(MaxPooling1D(pool_size=(8)))
model.add(Conv1D(128, 5,padding='same',))
model.add(Activation('relu'))
model.add(Dropout(0.1))
model.add(Flatten())
model.add(Dense(8))
model.add(Activation('softmax'))
opt = keras.optimizers.rmsprop(lr=0.00005, rho=0.9, epsilon=None, decay=0.0)

In [81]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 40, 128)           768       
_________________________________________________________________
activation_1 (Activation)    (None, 40, 128)           0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 40, 128)           0         
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 5, 128)            0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 5, 128)            82048     
_________________________________________________________________
activation_2 (Activation)    (None, 5, 128)            0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 5, 128)            0         
__________

In [82]:
model.compile(loss='sparse_categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

In [None]:
cnnhistory=model.fit(x_traincnn, y_train, batch_size=16, epochs=1000, validation_data=(x_testcnn, y_test))

In [None]:
plt.plot(cnnhistory.history['loss'])
plt.plot(cnnhistory.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
plt.plot(cnnhistory.history['acc'])
plt.plot(cnnhistory.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('acc')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
predictions = model.predict_classes(x_testcnn)
new_Ytest = y_test.astype(int)

In [None]:
from sklearn.metrics import classification_report
report = classification_report(new_Ytest, predictions)
print(report)

In [None]:
from sklearn.metrics import confusion_matrix
matrix = confusion_matrix(new_Ytest, predictions)
print (matrix)

# 0 = neutral, 1 = calm, 2 = happy, 3 = sad, 4 = angry, 5 = fearful, 6 = disgust, 7 = surprised

In [None]:
model_name = 'Emotion_Voice_Detection_Model.h5'
save_dir = '/Users/erenmac/Desktop/ENGR498/Code/Voice_Emo_Rec_Models'
# Save model and weights
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
model_path = os.path.join(save_dir, model_name)
model.save(model_path)
print('Saved trained model at %s ' % model_path)

In [None]:
loaded_model = keras.models.load_model('Emotion_Voice_Detection_Model.h5')
loaded_model.summary()

In [None]:
loss, acc = loaded_model.evaluate(x_testcnn, y_test)
print("Restored model, accuracy: {:5.2f}%".format(100*acc))

## 1.4- MEAN MFCCs KERAS MODEL (With reduced labels. Labels = 4)

In [84]:
# bla bla bla

## 1.5- MEAN DB AMPLITUDE MODEL AND .T VERSION (Soruce Data - LESS Data)

In [85]:
#bla bla bla   plt.plot(np.mean(librosa.amplitude_to_db(S, ref=np.max).T,axis=0))

# 2- SAVEE DATABASE

## 2.1- Data Extraction

In [14]:
path = '/Users/erenmac/Desktop/NEW_DATA_VOICE/AudioData/1'
lst = []
numlist = ["0","1","2","3","4","5","6","7","8","9"]

for subdir, dirs, files in os.walk(path):
    for file in files:
        if os.path.join(subdir,file).endswith('.DS_Store'):
            continue
        X, sample_rate = librosa.load(os.path.join(subdir,file))
        file = file[0:2]
        if file[1] in numlist:
            file = file[0]
        arr = X, file
        lst.append(arr)
        if sample_rate != 22050:
            print (sample_rate)

In [16]:
X, y = zip(*lst)
X = np.asarray(X)
y = np.asarray(y)

In [24]:
# 0 = neutral, 1 = calm, 2 = happy, 3 = sad, 4 = angry, 5 = fearful, 6 = disgust, 7 = surprised
label_to_num = {"a":4,"d":6,"f":5,"h":2 ,"n":0 ,"sa":3 ,"su":7}
newy = []
for item in y:
    newy.append(label_to_num[item])
y = newy

In [26]:
X_name = 'X.joblib'
y_name = 'y.joblib'
save_dir = '/Users/erenmac/Desktop/NEW_DATA_VOICE/AudioData/Joblib'

savedX = joblib.dump(X, os.path.join(save_dir, X_name))
savedy = joblib.dump(y, os.path.join(save_dir, y_name))

## 2.2- Data Load

In [29]:
X = joblib.load('/Users/erenmac/Desktop/NEW_DATA_VOICE/AudioData/Joblib/X.joblib')
y = joblib.load('/Users/erenmac/Desktop/NEW_DATA_VOICE/AudioData/Joblib/y.joblib')
y = np.asarray(y)

# 3- GERMAN DATABASE

## 3.1- Data Extraction

In [34]:
path = '/Users/erenmac/Desktop/NEW_DATA_VOICE/German_DB/wav'
lst = []

for subdir, dirs, files in os.walk(path):
    for file in files:
        if os.path.join(subdir,file).endswith('.DS_Store'):
            continue
        X, sample_rate = librosa.load(os.path.join(subdir,file))
        file = file[5]
        arr = X, file
        lst.append(arr)
        if sample_rate != 22050:
            print (sample_rate)

In [36]:
X, y = zip(*lst)
X = np.asarray(X)
y = np.asarray(y)

In [39]:
# 0 = neutral, 1 = calm, 2 = happy, 3 = sad, 4 = angry, 5 = fearful, 6 = disgust, 7 = surprised
label_to_num = {"W":4,"L":1,"E":6,"A":5 ,"F":2 ,"T":3,"N":0}
newy = []
for item in y:
    newy.append(label_to_num[item])
y = newy

In [41]:
X_name = 'X.joblib'
y_name = 'y.joblib'
save_dir = '/Users/erenmac/Desktop/NEW_DATA_VOICE/German_DB/Joblib'

savedX = joblib.dump(X, os.path.join(save_dir, X_name))
savedy = joblib.dump(y, os.path.join(save_dir, y_name))

## 3.2- Data Load

In [42]:
X = joblib.load('/Users/erenmac/Desktop/NEW_DATA_VOICE/German_DB/Joblib/X.joblib')
y = joblib.load('/Users/erenmac/Desktop/NEW_DATA_VOICE/German_DB/Joblib/y.joblib')
y = np.asarray(y)

# 4- TESS DATABASE

## 4.1- Data Extraction

### 4.1.1- Web Crawling (Database items are available at different links)

In [3]:
import os, ssl
from bs4 import BeautifulSoup
import urllib.request
import re
import shutil
import requests
if (not os.environ.get('PYTHONHTTPSVERIFY', '') and
    getattr(ssl, '_create_unverified_context', None)): 
    ssl._create_default_https_context = ssl._create_unverified_context

In [4]:
starturl = "https://tspace.library.utoronto.ca/handle/1807/24501"
initbaseurl = "https://tspace.library.utoronto.ca/handle/1807/"
baseurl = "https://tspace.library.utoronto.ca"
urllist = []
x = int(starturl[-5:])
while x > 24487:
    urllist.append(initbaseurl + str(x))
    x = x-1

In [5]:
data_urls = []

for item in urllist:
    r = requests.get(item)
    data = r.text
    soup = BeautifulSoup(data)
    for link in soup.find_all('a'):
        if link.get('href').endswith(".wav"):
            data_urls.append(link.get('href'))

In [10]:
# 0 = neutral, 1 = calm, 2 = happy, 3 = sad, 4 = angry, 5 = fearful, 6 = disgust, 7 = surprised
label_to_num = {"ps":7,"angry":4,"fear":5,"neurtal":0 ,"happy":2 ,"disgust":6 ,"sad":3}
labellist = ["ps.wav","angry.wav","fear.wav","neutral.wav","happy.wav","disgust.wav","sad.wav"]

In [None]:
output_file_path = "/Users/erenmac/Desktop/NEW_DATA_VOICE/TESS_Database/"
i = 0
for data in data_urls:
    for item in labellist:
        if item in data:
            url = baseurl + data
            output_file = output_file_path+str(i)+"_"+ item
            with urllib.request.urlopen(url) as response, open(output_file, 'wb') as out_file:
                shutil.copyfileobj(response, out_file)
            i = i+1

### 4.1.2- Data Extraction & Saving

In [30]:
path = '/Users/erenmac/Desktop/NEW_DATA_VOICE/TESS_Database'
lst = []
# 0 = neutral, 1 = calm, 2 = happy, 3 = sad, 4 = angry, 5 = fearful, 6 = disgust, 7 = surprised
label_to_num = {"ps":7,"angry":4,"fear":5,"neutral":0 ,"happy":2 ,"disgust":6 ,"sad":3}

for subdir, dirs, files in os.walk(path):
    for file in files:
        if os.path.join(subdir,file).endswith('.DS_Store'):
            continue
        X, sample_rate = librosa.load(os.path.join(subdir,file))
        for item in label_to_num:
            if item in str(file):
                file = label_to_num[item]
        arr = X, file
        lst.append(arr)
        if sample_rate != 22050:
            print (sample_rate)

In [32]:
X, y = zip(*lst)
X = np.asarray(X)
y = np.asarray(y)

In [36]:
X_name = 'X.joblib'
y_name = 'y.joblib'
save_dir = '/Users/erenmac/Desktop/NEW_DATA_VOICE/TESS_Database/Joblib'

savedX = joblib.dump(X, os.path.join(save_dir, X_name))
savedy = joblib.dump(y, os.path.join(save_dir, y_name))

## 4.2- Data Load

In [42]:
X = joblib.load('/Users/erenmac/Desktop/NEW_DATA_VOICE/TESS_Database/Joblib/X.joblib')
y = joblib.load('/Users/erenmac/Desktop/NEW_DATA_VOICE/TESS_Database/Joblib/y.joblib')
y = np.asarray(y)

# ----------------------MASS MODEL BUILDING----------------------

# 1- Data Preparing

### 1.1- Gathering All The Datasets Together

In [50]:
#bla bla bla

### 1.2- Saving The Big Dataset

In [51]:
#bla bla bla

### 1.3- Feature Extraction

In [52]:
#bla bla bla

# 2- Model Building

### 2.1- MEAN MFCCs KERAS MODEL (/W using RAVDESS big data and other databases.)

In [54]:
#bla bla bla

### 2.2- MEAN MFCCs REDUCED KERAS MODEL (/W using RAVDESS big data and other databases.) (4 LABEL)

In [55]:
#bla bla bla

### 2.3- MEAN DB AMPLITUDE MODEL AND .T VERSION (/W using RAVDESS small data and other databases)

In [56]:
#bla bla bla

### 2.4- WAVELET DATA EXPLORATION & MODEL BUILDING

In [57]:
#bla bla bla

### 2.5- SAVING MODELS

In [58]:
#bla bla bla

# 3- Model Testing & Evaluating 

In [59]:
#bla bla bla