# Imports

In [2]:
import pandas as pd
import numpy as np

import os
import sys

# librosa is a Python library for analyzing audio and music. It can be used to extract the data from the audio files we will see it later.
import librosa
import librosa.display
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

# to play the audio files
from IPython.display import Audio

# import keras
# from keras.callbacks import ReduceLROnPlateau
# from keras.models import Sequential
# from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, BatchNormalization
# from keras.utils import np_utils, to_categorical
# from keras.callbacks import ModelCheckpoint

import warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [3]:
github_csv_url = r"C:/Users/gusta/Documents/Data Science Bootcamp/Data Science/Final Project/Data/completeAudioSetdf.csv"

data_path = pd.read_csv(github_csv_url)



### Data Aug Function

In [4]:
# NOISE INJECTION
def noise(data):
    noise_amp = 0.035*np.random.uniform()*np.amax(data)
    data = data + noise_amp*np.random.normal(size=data.shape[0])
    return data

# TIME STRETCHING
def stretch(data, rate):
    return librosa.effects.time_stretch(data, rate=0.8)

# TIME SHIFTING
def shift(data):
    shift_range = int(np.random.uniform(low=-5, high = 5)*35000)
    return np.roll(data, shift_range)

# PITCH SHIFTING
def pitch(data, sampling_rate, pitch_factor=0.7):
    return librosa.effects.pitch_shift(data, sr=sampling_rate, n_steps=pitch_factor)


## Feature Extraction

The following feature selection will be used:

* Zero Crossing Rate
* Chroma_stft
* MFCC
* RMS(root mean square) value
* MelSpectogram to train our model.

### Functions

Extract features function

In [5]:
def extract_features(data):
    # Zero Crossing Rate
    result = np.array([])
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
    result = np.hstack((result, zcr))  # stacking horizontally

    # Chroma_stft
    stft = np.abs(librosa.stft(data))
    chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft).T, axis=0)
    result = np.hstack((result, chroma_stft))  # stacking horizontally

    # MFCC
    mfcc = np.mean(librosa.feature.mfcc(y=data).T, axis=0)
    result = np.hstack((result, mfcc))  # stacking horizontally

    # Root Mean Square Value
    rms = np.mean(librosa.feature.rms(y=data).T, axis=0)
    result = np.hstack((result, rms))  # stacking horizontally

    # MelSpectogram
    mel = np.mean(librosa.feature.melspectrogram(y=data).T, axis=0)
    result = np.hstack((result, mel))  # stacking horizontally

    return result

Get features function

In [6]:


def get_features(path):
    # duration and offset are used to take care of the no audio at the start and end of each audio file as seen above.
    data, sample_rate = librosa.load(path, duration=2.5, offset=0.6)
    
    # without augmentation
    res1 = extract_features(data)
    result = np.array(res1)
    
    # data with noise
    noise_data = noise(data)
    res2 = extract_features(noise_data)
    result = np.vstack((result, res2))  # stacking vertically
    
    # data with stretching and pitching
    new_data = stretch(data, rate=0.8)
    data_stretch_pitch = pitch(new_data, sample_rate)
    res3 = extract_features(data_stretch_pitch)
    result = np.vstack((result, res3))  # stacking vertically
    
    return result

### Using the functions to get convert the audios into a dataframe

In [7]:
X, Y = [], []
for path, emotion in zip(data_path.Path, data_path.Emotions):
    feature = get_features(path)
    for ele in feature:
        X.append(ele)
        # appending emotion 3 times as we have made 3 augmentation techniques on each audio file.
        Y.append(emotion)

In [8]:
len(X), len(Y), data_path.Path.shape

(36486, 36486, (12162,))

In [10]:
Features = pd.DataFrame(X)
Features['labels'] = Y
Features.to_csv('C:/Users/gusta/Documents/Data Science Bootcamp/Data Science/Final Project/Data/features.csv', index=False)
Features.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,153,154,155,156,157,158,159,160,161,labels
0,0.321275,0.729664,0.750032,0.730624,0.735275,0.713529,0.660531,0.684966,0.733049,0.753972,...,4.310903e-06,3.291511e-06,2.148075e-06,2.279739e-06,5.116493e-06,8.190282e-06,7e-06,5e-06,4.245834e-07,neutral
1,0.334106,0.829354,0.843001,0.821864,0.839591,0.82661,0.698667,0.699734,0.745395,0.779965,...,0.0001311704,0.0001364862,0.0001341184,0.0001342646,0.0001334335,0.0001419735,0.000143,0.000142,0.0001290688,neutral
2,0.188285,0.62213,0.699225,0.753333,0.721221,0.701736,0.682347,0.662826,0.686492,0.733961,...,8.577343e-07,9.576414e-07,7.733593e-07,5.233101e-07,3.592793e-07,9.261689e-07,2e-06,1e-06,7.753987e-08,neutral
3,0.293566,0.673896,0.722096,0.723508,0.682302,0.680533,0.675352,0.628977,0.679179,0.707283,...,6.984504e-06,7.034949e-06,6.654923e-06,6.979548e-06,1.214236e-05,9.640183e-06,1.1e-05,6e-06,4.254087e-07,neutral
4,0.307522,0.773911,0.80451,0.783638,0.770017,0.782777,0.683769,0.649273,0.712984,0.738875,...,4.867612e-05,4.668169e-05,4.564495e-05,4.782138e-05,4.966828e-05,4.710044e-05,5.1e-05,4.4e-05,3.921349e-05,neutral
