In [2]:
import pandas as pd
import numpy as np
import re
import os,time,librosa,warnings,glob
from sklearn.preprocessing import MinMaxScaler,OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
warnings.filterwarnings("ignore")

In [5]:
df = pd.read_csv("csvs/big_audio_feats_corr2.csv")
df = df.sample(frac=1).reset_index(drop=True)
#df.head(10)

In [6]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,153,154,155,156,157,158,159,160,161,EMOTION
0,0.080169,0.646173,0.538033,0.350305,0.318666,0.409929,0.515067,0.439712,0.458682,0.589671,...,9.205287e-11,8.007018e-11,7.185033e-11,6.591991e-11,6.157751e-11,5.842195e-11,5.613951e-11,5.464044e-11,5.370214e-11,angry
1,0.173680,0.611925,0.583301,0.517663,0.480590,0.480441,0.596551,0.616726,0.511598,0.486148,...,9.790848e-03,9.064898e-03,6.747935e-03,2.958406e-03,2.046031e-03,9.269216e-04,1.191214e-04,1.123150e-05,9.562149e-07,surprised
2,0.086366,0.668026,0.577960,0.565601,0.680267,0.683219,0.605920,0.648096,0.641432,0.659432,...,4.492293e-07,4.257500e-07,4.071320e-07,3.916857e-07,3.790073e-07,3.690976e-07,3.613464e-07,3.559929e-07,3.525660e-07,angry
3,0.078636,0.401729,0.497316,0.550468,0.590597,0.535208,0.558647,0.416240,0.366665,0.436944,...,1.685822e-07,1.557665e-07,1.463430e-07,1.392516e-07,1.311315e-07,1.104085e-07,7.296157e-08,4.095894e-08,2.845191e-08,happy
4,0.070534,0.444725,0.467381,0.527001,0.584757,0.606219,0.543451,0.536835,0.565400,0.608153,...,1.000433e-07,9.390590e-08,8.910678e-08,8.519245e-08,8.202887e-08,7.957360e-08,7.767268e-08,7.636262e-08,7.553222e-08,happy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22283,0.077268,0.448152,0.487058,0.539423,0.533187,0.421850,0.436524,0.592964,0.754447,0.642154,...,1.271446e-08,1.172142e-08,1.105287e-08,1.045553e-08,9.897358e-09,8.325353e-09,5.576894e-09,3.238224e-09,2.311835e-09,disgust
22284,0.146012,0.552327,0.663127,0.647890,0.608816,0.507898,0.571738,0.641739,0.635468,0.581080,...,4.631727e-05,4.927229e-05,4.190162e-05,3.564119e-05,4.134921e-05,2.223936e-05,7.280930e-06,3.682356e-07,6.592551e-08,disgust
22285,0.155299,0.445872,0.465960,0.526010,0.495387,0.404272,0.425853,0.427747,0.539635,0.562811,...,1.687798e-03,9.325609e-04,6.471080e-04,4.591695e-04,3.541459e-04,2.777262e-04,7.686566e-05,3.898820e-06,2.706782e-08,sad
22286,0.035650,0.672858,0.699118,0.741956,0.666711,0.731715,0.760818,0.725760,0.782282,0.833848,...,1.720653e-08,1.619415e-08,1.539575e-08,1.473970e-08,1.420652e-08,1.379108e-08,1.346838e-08,1.324555e-08,1.310400e-08,sad


In [3]:
df = df.loc[df['emotion'].isin(["angry", "disgust", "feat", "happy", "neural", "surprised", "sad"])]

In [11]:
df.emotion.unique()

array(['sad', 'happy', 'disgust', 'angry', 'neural', 'surprised'],
      dtype=object)

In [17]:
def add_noise(data):
    noise_value = 0.015 * np.random.uniform() * np.amax(data)
    data = data + noise_value * np.random.normal(size=data.shape[0])
    return data

def stretch_process(data,rate=0.8):
    return librosa.effects.time_stretch(data,rate)

def shift_process(data):
    shift_range = int(np.random.uniform(low=-5,high=5) * 1000)
    return np.roll(data,shift_range)

def pitch_process(data,sampling_rate,pitch_factor=0.7):
    return librosa.effects.pitch_shift(data,sampling_rate,pitch_factor)

def extract_process(data, sample_rate):
    
    output_result = np.array([])
    mean_zero = np.mean(librosa.feature.zero_crossing_rate(y=data).T,axis=0)
    output_result = np.hstack((output_result,mean_zero))
    
    stft_out = np.abs(librosa.stft(data))
    chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft_out,sr=sample_rate).T,axis=0)
    output_result = np.hstack((output_result,chroma_stft))
    
    mfcc_out = np.mean(librosa.feature.mfcc(y=data,sr=sample_rate).T,axis=0)
    output_result = np.hstack((output_result,mfcc_out))
    
    root_mean_out = np.mean(librosa.feature.rms(y=data).T,axis=0)
    output_result = np.hstack((output_result,root_mean_out))
    
    mel_spectogram = np.mean(librosa.feature.melspectrogram(y=data,sr=sample_rate).T,axis=0)
    output_result = np.hstack((output_result,mel_spectogram))
    
    return output_result

def export_process(path):
    
    data,sample_rate = librosa.load(path,duration=2.5,offset=0.6)
    
    output_1 = extract_process(data, sample_rate)
    result = np.array(output_1)
    
    noise_out = add_noise(data)
    output_2 = extract_process(noise_out, sample_rate)
    result = np.hstack((result))

    new_out = stretch_process(data)
    strectch_pitch = pitch_process(new_out,sample_rate)
    output_3 = extract_process(strectch_pitch, sample_rate)
    result = np.vstack((result,output_3))
    
    return result
def create_new_df(df, name):
    # df = pd.read_csv("csvs/all_data.csv")
    # df = df.sample(frac=1).reset_index(drop=True)
    #print(df.head(10))

    #AVAILABLE_EMOTIONS = {"disgust", "fear", "angry","sad","neutral","happy"}
    X_train, y_train = [], []

    for path, emotion in zip(df.iloc[:,0],df.iloc[:,1]):
        # if emotion not in AVAILABLE_EMOTIONS:
        #     continue
        features = export_process(path)

        for element in features:
            X_train.append(element)
            y_train.append(emotion)

    New_Features_Wav = pd.DataFrame(X_train)
    New_Features_Wav["EMOTION"] = y_train

    New_Features_Wav.to_csv(name,index=False)

    New_Features_Wav = pd.read_csv(name)
    
    return New_Features_Wav


In [18]:
df_feat_big= create_new_df(df, "big_audio_feats_corr.csv")
df_feat_big

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,153,154,155,156,157,158,159,160,161,EMOTION
0,0.044565,0.625219,0.669099,0.630701,0.618155,0.661609,0.665891,0.673775,0.670684,0.760131,...,1.752546e-10,1.610365e-10,1.503487e-10,1.419565e-10,1.353704e-10,1.303529e-10,1.265452e-10,1.239364e-10,1.223104e-10,sad
1,0.048548,0.568396,0.577584,0.582963,0.595910,0.643647,0.650736,0.685002,0.663822,0.729856,...,3.074448e-10,2.718559e-10,2.447608e-10,2.092390e-10,1.959802e-10,1.687439e-10,9.818545e-11,4.532748e-11,3.049806e-11,sad
2,0.059957,0.545041,0.616390,0.668240,0.559134,0.630151,0.647411,0.614820,0.594325,0.589263,...,6.514041e-09,6.113898e-09,5.799055e-09,5.541281e-09,5.332493e-09,5.170139e-09,5.044340e-09,4.957464e-09,4.902447e-09,happy
3,0.059021,0.471908,0.562879,0.651745,0.583528,0.549706,0.638284,0.547001,0.541729,0.526636,...,9.297780e-09,8.703124e-09,9.034041e-09,8.091329e-09,7.580184e-09,6.576466e-09,4.883077e-09,2.871500e-09,2.124199e-09,happy
4,0.037839,0.317716,0.355972,0.354338,0.441349,0.543113,0.619506,0.569330,0.363126,0.271575,...,3.389664e-05,2.891622e-05,4.836611e-05,7.051951e-05,2.589511e-05,6.799779e-06,1.419092e-06,9.883137e-08,1.759215e-08,disgust
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22283,0.192660,0.697742,0.544205,0.429155,0.425979,0.387936,0.457979,0.572322,0.459468,0.435643,...,3.736144e-04,2.402351e-04,1.453432e-04,1.413675e-04,1.614788e-04,7.718549e-05,2.737223e-05,1.095373e-06,3.086717e-08,angry
22284,0.135577,0.497444,0.488712,0.527031,0.619153,0.690311,0.743118,0.702975,0.708047,0.705078,...,7.100594e-09,6.716296e-09,6.413456e-09,6.163061e-09,5.958692e-09,5.798916e-09,5.674360e-09,5.588321e-09,5.533261e-09,sad
22285,0.143461,0.583472,0.478412,0.480433,0.534548,0.623061,0.711179,0.743939,0.685528,0.699851,...,6.130970e-09,5.708650e-09,5.399918e-09,5.165920e-09,4.890355e-09,4.137983e-09,2.751084e-09,1.554911e-09,1.084328e-09,sad
22286,0.072132,0.484376,0.421624,0.540360,0.375669,0.308542,0.231356,0.212378,0.246682,0.387523,...,9.047753e-04,3.293371e-04,3.110049e-04,5.790492e-04,5.522261e-04,1.673740e-04,1.704381e-05,1.205301e-06,3.904860e-08,happy


In [19]:
df_feat_big.EMOTION.unique()

array(['sad', 'happy', 'disgust', 'angry', 'neural', 'surprised'],
      dtype=object)

In [5]:
df_check = pd.read_csv("csvs/big_audio_feats_corr.csv")

In [10]:
df_check.EMOTION.unique()

array(['disgust', 'sad', 'happy', 'angry'], dtype=object)

In [45]:
enc_labes = {"angry": 0, "happy": 1, "neural": 2, "sad":3}

In [54]:
#df_new = create_new_df(df.iloc[:], "/csvs/audio_feats.csv")

X  = df_feat.iloc[:, :-1].values
y = df_feat["EMOTION"].values

encoder = OneHotEncoder()
scaler = StandardScaler()

y = encoder.fit_transform(np.array(y).reshape(-1,1)).toarray()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42, shuffle=True)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.20, random_state=42, shuffle=True)

X_train = np.expand_dims(X_train, axis=2)
X_test = np.expand_dims(X_test, axis=2)
X_val = np.expand_dims(X_val, axis=2)

X_train.shape, X_test.shape, X_val.shape, y_train.shape, y_test.shape, y_val.shape

((12947, 162, 1),
 (2589, 162, 1),
 (648, 162, 1),
 (12947, 4),
 (2589, 4),
 (648, 4))