## Imports

In [1]:
import librosa
import os
import pandas as pd
import glob 
import numpy as np
import soundfile
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
%matplotlib inline

## Target and Feature Extraction

In [2]:
# Use relative file path to get directory of files"
path = "../raw_data/1-24 Audio Song and speech"

# Use os library to get file_names
directories = os.listdir(path)

In [3]:
# Initialize empty arrays for each field
speech_song = []
emotion = []
intensity = []
statement = []
repetitions = []
actor_id = []
gender = []

# Use for loop to process each file name.
for file in directories:
    f = file[0:-4]
    f = f.split("-")
    speech_song.append(f[1])
    emotion.append(f[2])
    intensity.append(f[3])
    statement.append(f[4])
    repetitions.append(f[5])
    actor_id.append(f[6])
    gender.append(int(f[6]) % 2)

In [4]:
# Create the empty data frame.
df = pd.DataFrame()

# Create the columns in the dataframe using arrays
df["speech_song"] = speech_song
df["emotion"] = emotion
df["intensity"] = intensity
df["statement"] = statement
df["repetitions"] = repetitions
df["actor_id"] = actor_id
df["gender"] = gender

In [5]:
# Create a gender in string version
df["gender_s"] = df["gender"].apply(lambda x: "Male" if x == 1 else "Female")


# Create an emotion dict to map the emotions
emotion_dict = {"01":"Neutral", "02":"Calm", "03":"Happy", "04":"Sad", "05":"Angry", "06":"Fearful"}

#Create a string column of the emotions and map them to emotion_dict
df["emotion_s"] = df["emotion"].map(emotion_dict)

# Get a dummies df of the emotions_strings
dummies = pd.get_dummies(df["emotion_s"])

# Horizontally concat the dummies df with original df.
df = pd.concat([df, dummies], axis=1)

df

Unnamed: 0,speech_song,emotion,intensity,statement,repetitions,actor_id,gender,gender_s,emotion_s,Angry,Calm,Fearful,Happy,Neutral,Sad
0,01,01,01,01,01,01,1,Male,Neutral,0,0,0,0,1,0
1,01,01,01,01,01,02,0,Female,Neutral,0,0,0,0,1,0
2,01,01,01,01,01,03,1,Male,Neutral,0,0,0,0,1,0
3,01,01,01,01,01,04,0,Female,Neutral,0,0,0,0,1,0
4,01,01,01,01,01,05,1,Male,Neutral,0,0,0,0,1,0
5,01,01,01,01,01,06,0,Female,Neutral,0,0,0,0,1,0
6,01,01,01,01,01,07,1,Male,Neutral,0,0,0,0,1,0
7,01,01,01,01,01,08,0,Female,Neutral,0,0,0,0,1,0
8,01,01,01,01,01,09,1,Male,Neutral,0,0,0,0,1,0
9,01,01,01,01,01,10,0,Female,Neutral,0,0,0,0,1,0


In [6]:
path = "../raw_data/1-24 Audio Song and speech/*.wav"

In [7]:
def extract_feature(file_name, mfcc=False, chroma=False, mel=False):
    X = librosa.load(file_name)
    sample_rate = sound_file.samplerate
    #if chroma:
    #    stft=np.abs(librosa.stft(X))
    result = np.array([])
    if mfcc:
        mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
        result=np.hstack((result, mfccs))
    #if chroma:
    #    chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    #    result=np.hstack((result, chroma))
    if mel:
        mel1=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
        result=np.hstack((result, mel1))
    return result

In [9]:
def load_data(test_size=0.2):
    x,y=[],[]
    for file in glob.glob(path):
        try:
            feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
            x.append(feature)
        except:
            print(file)
    y = df[['Neutral', 'Calm', 'Happy', 'Sad', 'Angry', 'Fearful']]
    return train_test_split(np.array(x), y, test_size=test_size, random_state=9)

In [10]:
x_train,x_test,y_train,y_test=load_data(test_size=0.25)

../raw_data/1-24 Audio Song and speech\03-01-02-01-01-02-01.wav
../raw_data/1-24 Audio Song and speech\03-01-02-01-02-02-05.wav
../raw_data/1-24 Audio Song and speech\03-01-03-01-02-01-20.wav
../raw_data/1-24 Audio Song and speech\03-01-06-01-01-02-20.wav
../raw_data/1-24 Audio Song and speech\03-02-01-01-01-01-24.wav


ValueError: Found input variables with inconsistent numbers of samples: [2063, 2068]