# Importing Necessary Libraries

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
import seaborn as sns
import os
import librosa

from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import ExtraTreesClassifier, AdaBoostClassifier, GradientBoostingClassifier, BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold
from keras.layers import Dense,Dropout
from keras.models import Sequential
from sklearn.metrics import accuracy_score
import joblib, pickle
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
SAMPLE_RATE = 45100

from tqdm import tqdm
from scipy.stats import skew
data_path = '../input/intelligence-augmentation-ia-for-ai/dataset/'

# Loading the Dataset 

In [2]:
#loading data
audio_train_files = os.listdir(data_path+'TrainAudioFiles')
audio_test_files = os.listdir(data_path+'TestAudioFiles')

train = pd.read_csv(data_path+'train.csv')
test = pd.read_csv(data_path+'test.csv')
submission = pd.read_csv(data_path+'sample_submission.csv')

debug = False
if debug:
    train = train.head(10)
    test = test.head(10)

# Feature Extraction From Audio Files

In [3]:
#returns mfcc features with mean and standard deviation along time
def get_mfcc(name, path):
    b, _ = librosa.core.load(path + name, sr = SAMPLE_RATE)
    assert _ == SAMPLE_RATE
    try:
        ft1 = librosa.feature.mfcc(b, sr = SAMPLE_RATE, n_mfcc=20)
        ft2 = librosa.feature.zero_crossing_rate(b)[0]
        ft3 = librosa.feature.spectral_rolloff(b)[0]
        ft4 = librosa.feature.spectral_centroid(b)[0]
        ft5 = librosa.feature.spectral_contrast(b)[0]
        ft6 = librosa.feature.spectral_bandwidth(b)[0]
        ft7 = librosa.feature.spectral_flatness(b)[0]
        ft8 = librosa.feature.melspectrogram(b)[0]
        ft1_trunc = np.hstack((np.mean(ft1, axis = 1), np.std(ft1, axis = 1), skew(ft1, axis = 1), np.max(ft1, axis = 1), np.min(ft1, axis = 1), np.sum(ft1, axis = 1)))
        ft2_trunc = np.hstack((np.mean(ft2), np.std(ft2), skew(ft2), np.max(ft2), np.min(ft2), np.sum(ft2)))
        ft3_trunc = np.hstack((np.mean(ft3), np.std(ft3), skew(ft3), np.max(ft3), np.min(ft3), np.sum(ft3)))
        ft4_trunc = np.hstack((np.mean(ft4), np.std(ft4), skew(ft4), np.max(ft4), np.min(ft4), np.sum(ft4)))
        ft5_trunc = np.hstack((np.mean(ft5), np.std(ft5), skew(ft5), np.max(ft5), np.min(ft5), np.sum(ft5)))
        ft6_trunc = np.hstack((np.mean(ft6), np.std(ft6), skew(ft6), np.max(ft6), np.min(ft6), np.sum(ft6)))
        ft7_trunc = np.hstack((np.mean(ft7), np.std(ft7), skew(ft7), np.max(ft7), np.min(ft7), np.sum(ft7)))
        ft8_trunc = np.hstack((np.mean(ft8), np.std(ft8), skew(ft8), np.max(ft8), np.min(ft8), np.sum(ft8)))
        
        return pd.Series(np.hstack((ft1_trunc, ft2_trunc, ft3_trunc, ft4_trunc, ft5_trunc, ft6_trunc, ft7_trunc, ft8_trunc)))
    except:
        print('bad file')
        return pd.Series([0]*115)

In [4]:
train_data = pd.DataFrame()
train_data['filename'] = train['filename']
test_data = pd.DataFrame()
test_data['filename'] = test['filename']

train_data = train_data['filename'].apply(get_mfcc, path=data_path+'TrainAudioFiles/')
print('done loading train mfcc')
test_data = test_data['filename'].apply(get_mfcc, path=data_path+'TestAudioFiles/')
print('done loading test mfcc')

train_data.fillna(0, inplace = True)
test_data.fillna(0, inplace = True)

train_data['filename'] = train['filename']
test_data['filename'] = test['filename']
train_data['emotion'] = train['emotion']

done loading train mfcc
done loading test mfcc


# Loading the audio features dataset

In [5]:
encode = {'emotion': {'anger': 0, 'disgust': 1, 'fear': 2, 'joy': 3, 'neutral': 4, 'sadness': 5, 'surprise': 6}}
decode = {'emotion': {0: 'anger', 1: 'disgust', 2: 'fear', 3: 'joy', 4: 'neutral', 5: 'sadness', 6: 'surprise'}}
train_data = train_data.replace(encode)
train_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,154,155,156,157,158,159,160,161,filename,emotion
0,-354.867004,193.175323,-53.355343,39.074314,3.960026,4.72156,-3.838528,-4.757781,7.602531,-6.266951,...,5e-06,0.014919,0.163772,0.175347,1.610929,0.950949,0.001916,35.210976,18777.mp3,4
1,-355.202362,143.428635,-57.462612,40.503212,-16.505716,8.730952,-11.762589,-8.982899,-7.560604,-11.372784,...,9e-06,0.129636,0.021694,0.079036,5.659012,0.675762,0.000318,6.421291,24041.mp3,4
2,-431.580414,178.00209,-38.236614,60.291965,21.734367,0.912716,-13.096365,-5.913789,-5.313841,-12.444064,...,1e-05,0.003418,0.026037,0.02445,1.253504,0.088569,0.000684,0.807148,1621.mp3,3
3,-304.168152,172.427704,-58.746151,23.765541,-25.326628,1.595185,-11.662876,3.741948,-10.874989,-11.988649,...,8e-06,0.023044,0.04765,0.111315,5.614192,0.949456,0.00023,6.575749,28883.mp3,4
4,-454.580322,135.082611,-34.125038,46.622921,-17.965414,5.93189,4.517963,0.285223,7.517501,-7.701989,...,5e-06,0.28585,0.014066,0.01614,2.703236,0.105954,0.00052,3.502504,14918.mp3,4


In [6]:
cols = list(train_data.columns)
cols.remove('emotion')
cols.remove('filename')

In [7]:
train_x = train_data[cols]
train_y = train_data['emotion']
x_test = test_data[cols]

In [8]:
debug = False
if debug:
    train_x = train_x[:10]
    train_y = train_y[:10]
    x_test = x_test[:10]

# Model Training

In [9]:
final_preds = np.zeros((len(x_test), 7))
n_splits = 10
random_states = [42]

for seed in random_states:
    test_preds = np.zeros((len(x_test), 7))
    oof_predictions = np.zeros(len(train_data))
    skf = StratifiedKFold(n_splits = n_splits , shuffle = True , random_state = seed)
    print(f"SEED {seed}")
    for fold, (tr_index , val_index) in enumerate(skf.split(train_x.values , train_y.values)):

        print("-" * 50)
        print(f"Fold {fold}")

        x_train,x_val = train_x.values[tr_index] , train_x.values[val_index]
        y_train,y_val = train_y.values[tr_index] , train_y.values[val_index]

        model1 = LGBMClassifier()
        model2 = ExtraTreesClassifier(random_state = 0)
        model3 = RandomForestClassifier(n_estimators = 500, random_state = 42)
        
        clf = VotingClassifier(estimators=[('lgbm', model1), ('ext', model2), ('rf', model3)], weights = [8, 2, 1], voting='soft')
        clf.fit(x_train, y_train)
        
        joblib.dump(clf, f"vc_fold_{fold}.pkl")
        
        val_preds = clf.predict_proba(x_val)
        print(f"Ensemble Validation Accuracy : " , accuracy_score(y_val , np.argmax(val_preds, axis = -1)))
        
        oof_predictions[val_index] = np.argmax(val_preds, axis = -1)
        test_preds += (clf.predict_proba(x_test.values))
            
    print("-" * 50)
    print(f"Ensemble OOF Score with SEED {seed} : " , accuracy_score(train_y, oof_predictions))
    print("-" * 50)
    
    test_preds /= n_splits
    final_preds += test_preds
        
final_preds /= len(random_states)

SEED 42
--------------------------------------------------
Fold 0
Ensemble Validation Accuracy :  0.5824742268041238
--------------------------------------------------
Fold 1
Ensemble Validation Accuracy :  0.6202749140893471
--------------------------------------------------
Fold 2
Ensemble Validation Accuracy :  0.5876288659793815
--------------------------------------------------
Fold 3
Ensemble Validation Accuracy :  0.5962199312714777
--------------------------------------------------
Fold 4
Ensemble Validation Accuracy :  0.5893470790378007
--------------------------------------------------
Fold 5
Ensemble Validation Accuracy :  0.5927835051546392
--------------------------------------------------
Fold 6
Ensemble Validation Accuracy :  0.6041308089500861
--------------------------------------------------
Fold 7
Ensemble Validation Accuracy :  0.5697074010327022
--------------------------------------------------
Fold 8
Ensemble Validation Accuracy :  0.5851979345955249
-----------

# Prediction

In [10]:
test_data['emotion'] = np.argmax(final_preds, axis=-1)
test_data = test_data.replace(decode)
test_data[['filename', 'emotion']].to_csv('submission.csv', index = False)
test_data[['filename', 'emotion']].head()

Unnamed: 0,filename,emotion
0,26199.mp3,neutral
1,692.mp3,neutral
2,16821.mp3,neutral
3,1608.mp3,neutral
4,47947.wav,disgust


# Postprocessing

In [11]:
postprocess = True
if postprocess:
    count = 0
    final_preds1 = final_preds.copy()
    for i in range(len(final_preds1)):
        temp = np.argmax(final_preds1[i])
        if(temp == 4 and final_preds1[i][temp] > 0.5 and final_preds1[i][temp] <= 0.51):
            final_preds1[i][temp] = 0
            count += 1
    print(count)
    test_data['emotion'] = np.argmax(final_preds1, axis=-1)
    test_data = test_data.replace(decode)
    test_data[['filename', 'emotion']].to_csv("submission_postprocess.csv", index = False)

25
