# Speech Emotion Recognition: Audio + Text (IEMOCAP)
### Made by LuisFraga,
### January, 2020

In [1]:
import gc
gc.collect()
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


In [2]:
import numpy as np
import librosa
import glob, os
import pandas as pd
import pickle # to save model after training

------------------------

## IEMOCAP Preprocessing

### Get Labels

In [7]:
## Labels Pre-processing
label_dir = 'dataset/IEMOCAP/Labels'
label_list = []
for file in glob.glob(label_dir + '/*.txt'):  
    basename = os.path.basename(file)
    label_list.append(file)    
    
contents = []
for file in label_list:
    f = open(file, "r")
    for line in f:
        contents.append(line)

tmp = pd.Series(contents)
tmp = tmp.str.split(":")

file_name = []
label = []

for line in tmp:
    file_name.append(line[0])
    label.append(line[1])

## Create DataFrame
LABEL_DF = pd.DataFrame(file_name, columns = ["FILE_NAME"])
LABEL_DF['LABEL'] = label
LABEL_DF['FILE_NAME'] = LABEL_DF['FILE_NAME'].str.replace(' ','')
LABEL_DF['LABEL'] = LABEL_DF['LABEL'].apply(lambda x: x.split(';')[0])

selected_emotions = ['Anger', 'Happiness', 'Sadness', 'Neutral state', 'Other']

LABEL_DF = LABEL_DF[LABEL_DF['LABEL'].isin(selected_emotions)]
LABEL_DF = LABEL_DF.drop_duplicates(['FILE_NAME'])

LABEL_DF['LABEL'] = LABEL_DF['LABEL'].replace('Other', 'Neutral state')
print(LABEL_DF['LABEL'].value_counts())

print(LABEL_DF.shape)
LABEL_DF.head()

Neutral state    2497
Anger            2400
Happiness        2025
Sadness          1858
Name: LABEL, dtype: int64
(8780, 2)


Unnamed: 0,FILE_NAME,LABEL
0,Ses01F_impro01_F000,Neutral state
1,Ses01F_impro01_F001,Neutral state
2,Ses01F_impro01_F002,Neutral state
3,Ses01F_impro01_F003,Neutral state
4,Ses01F_impro01_F004,Neutral state


## Balance Labels

In [8]:
from sklearn.utils import resample
anger = LABEL_DF.loc[LABEL_DF['LABEL'] == 'Anger']
sadness = LABEL_DF.loc[LABEL_DF['LABEL'] == 'Sadness']
happy = LABEL_DF.loc[LABEL_DF['LABEL'] == 'Happiness']
neutral = LABEL_DF.loc[LABEL_DF['LABEL'] == 'Neutral state']

anger = resample(anger, replace = False, n_samples = 2000)
sadness = resample(sadness, replace = True, n_samples = 2000)
happy = resample(happy, replace = False, n_samples = 2000)
neutral = resample(neutral, replace = False, n_samples = 2000)

frames = [anger, sadness, happy, neutral]

NEW_DF = pd.concat(frames)

print(NEW_DF['LABEL'].value_counts())
print(NEW_DF.shape)
NEW_DF = NEW_DF.sort_values(by=['FILE_NAME'])
NEW_DF = NEW_DF.reset_index()
NEW_DF = NEW_DF.drop(columns = ['index'])
LABEL_DF = NEW_DF
LABEL_DF.head()

Anger            2000
Neutral state    2000
Sadness          2000
Happiness        2000
Name: LABEL, dtype: int64
(8000, 2)


Unnamed: 0,FILE_NAME,LABEL
0,Ses01F_impro01_F000,Neutral state
1,Ses01F_impro01_F001,Neutral state
2,Ses01F_impro01_F002,Neutral state
3,Ses01F_impro01_F003,Neutral state
4,Ses01F_impro01_F004,Neutral state


### Get Text

In [9]:
## Text Pre-processing
text_dir = 'dataset/IEMOCAP/Text_Files'
text_list = []
for file in glob.glob(text_dir + '/*.txt'):  
    basename = os.path.basename(file)
    text_list.append(file)       

contents = []
for file in text_list:
    f = open(file, "r")
    for line in f:
        contents.append(line)

tmp = pd.Series(contents)
tmp = tmp.str.split(":")

file_name = []
text = []

for line in tmp:
    name = line[0].split(" ")
    file_name.append(name[0])
    text.append(line[1])

## Create DataFrame
TEXT_DF = pd.DataFrame(file_name, columns = ["FILE_NAME"])
text = [t.replace('\n', '') for t in text]
TEXT_DF['TEXT'] = text

## Clean dataframe
remove_extras = ['M', 'F']
TEXT_DF = TEXT_DF[~TEXT_DF['FILE_NAME'].isin(remove_extras)]
TEXT_DF = TEXT_DF.drop_duplicates(['FILE_NAME'])

print(TEXT_DF.shape)
TEXT_DF.head()

(10085, 2)


Unnamed: 0,FILE_NAME,TEXT
0,Ses01F_impro01_F000,Excuse me.
1,Ses01F_impro01_M000,Do you have your forms?
2,Ses01F_impro01_F001,Yeah.
3,Ses01F_impro01_M001,Let me see them.
4,Ses01F_impro01_F002,Is there a problem?


### Get Audio Files

In [10]:
## Get all wav paths
parent_dir = 'dataset/IEMOCAP/Wav_Files'
subject_dirs = [os.path.join(parent_dir, dir) for dir in os.listdir(parent_dir) if os.path.isdir(os.path.join(parent_dir, dir))]

wav_list = []
for dir in subject_dirs:
    wav_files = [os.path.join(dir, wav) for wav in os.listdir(dir) if os.path.isfile(os.path.join(dir, wav)) and wav.endswith('.wav')]
    for file in wav_files:
        wav_list.append(file)
        
wav_list = pd.Series(wav_list)
tmp = wav_list.str.split("\\")

wav_names = []
for line in tmp:
    wav_names.append(line[2])
    
WAV_DF = pd.DataFrame(wav_list, columns = ["FILE_PATH"])
WAV_DF["FILE_NAME"] = wav_names
WAV_DF['FILE_NAME'] = WAV_DF['FILE_NAME'].str.replace('.wav','')

### Map (Audio x Labels x Text)

In [11]:
AUDIO_LABEL = pd.DataFrame()
for index, row in LABEL_DF.iterrows():
    match = WAV_DF.loc[WAV_DF['FILE_NAME'] == row.FILE_NAME]
    match['LABEL'] = row.LABEL
    AUDIO_LABEL = AUDIO_LABEL.append(match, ignore_index = True)

IEMOCAP = pd.DataFrame()
for index, row in TEXT_DF.iterrows():
    match = AUDIO_LABEL.loc[AUDIO_LABEL['FILE_NAME'] == row.FILE_NAME]
    match['SENTENCE'] = row.TEXT
    IEMOCAP = IEMOCAP.append(match, ignore_index = True)
    
# IEMOCAP.to_csv("IEMOCAP.csv")

del WAV_DF, LABEL_DF, TEXT_DF, tmp

print(IEMOCAP.shape)
IEMOCAP.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


(8000, 4)


Unnamed: 0,FILE_PATH,FILE_NAME,LABEL,SENTENCE
0,dataset/IEMOCAP/Wav_Files\Ses01F_impro01\Ses01...,Ses01F_impro01_F000,Neutral state,Excuse me.
1,dataset/IEMOCAP/Wav_Files\Ses01F_impro01\Ses01...,Ses01F_impro01_M000,Neutral state,Do you have your forms?
2,dataset/IEMOCAP/Wav_Files\Ses01F_impro01\Ses01...,Ses01F_impro01_F001,Neutral state,Yeah.
3,dataset/IEMOCAP/Wav_Files\Ses01F_impro01\Ses01...,Ses01F_impro01_M001,Neutral state,Let me see them.
4,dataset/IEMOCAP/Wav_Files\Ses01F_impro01\Ses01...,Ses01F_impro01_F002,Neutral state,Is there a problem?


---------------

## Text Processing

In [12]:
from tensorflow.keras import  preprocessing, utils
import itertools

VOCAB_SIZE = 3000 # positive only

MAX_LEN = 30

### Encoder
input_lines = list()
labels = []
for index, row in IEMOCAP.iterrows():
    input_lines.append('<BOS> ' + row.SENTENCE + ' <EOS>') 
    labels.append(row.LABEL)

tokenizer = preprocessing.text.Tokenizer(filters='!"#$%&()*+,-.:;=?@[\\]^_`{|}~\t\n', num_words = VOCAB_SIZE)
tokenizer.fit_on_texts(input_lines) 
tokenized_input_lines = tokenizer.texts_to_sequences(input_lines) 

length_list = list()
for token_seq in tokenized_input_lines:
    length_list.append(len(token_seq))

max_input_length = max(length_list) # Gets the higher value in the list  
print('Input max length is:', max_input_length)

padded_input_lines = preprocessing.sequence.pad_sequences(tokenized_input_lines , maxlen= MAX_LEN , padding='post')
encoder_input_data = np.array(padded_input_lines)
print('Encoder input data shape:', encoder_input_data.shape)

input_word_dict = tokenizer.word_index
print("Maximum size of Vocab:", len(input_word_dict))
input_word_dict = dict(itertools.islice(input_word_dict.items(), VOCAB_SIZE-1))

num_input_tokens = len(input_word_dict)+1
input_word_dict['<unk>'] = num_input_tokens
print('Number of Input tokens:', num_input_tokens)

### Prepare Labels
from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder


print(encoder_input_data.shape)

X_text = encoder_input_data

Input max length is: 102
Encoder input data shape: (8000, 30)
Maximum size of Vocab: 3040
Number of Input tokens: 3000
(8000, 30)


Using TensorFlow backend.


## Load Glove

In [13]:
### Load Glove (Word Embeddings)
embeddings_index = {}

with open(r'C:\Users\abdi\Desktop\2Meu\WordEmbeddings\glove.6B.100d.txt', encoding='utf-8') as f:
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs
    f.close()

print("Glove Loaded!") 

def embedding_matrix_creator(embedding_dimension):
    embedding_matrix = np.zeros((VOCAB_SIZE, embedding_dimension))
    for word, i in input_word_dict.items():
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector
    return embedding_matrix

embedding_matrix = embedding_matrix_creator(100) # Change embedding dimensions

Glove Loaded!


-----------------------

## Audio Processing

## Extract Speech Features (MFCC)

In [16]:
def extract_features(file_path):
    max_pad_len = 750
    try:
        audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')        
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    
        shape = mfccs.shape[1]
                     
        if max_pad_len > mfccs.shape[1]:
            pad_width = max_pad_len - mfccs.shape[1]
            mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')

        else:
            print(mfccs.shape)
            mfccs = mfccs[:, :max_pad_len]
            
    except Exception as e:
        print("Error encountered while parsing file: ", file_path)
        mfccs = "error"
        return mfccs
        
    return mfccs

----------------------

## Load Features

In [None]:
def load_data():
    X, y = [], []
    i = 0
    for index, row in IEMOCAP.iterrows():
        print("File:", i+1)
        i+=1
        features = extract_features(row.FILE_PATH)
        emotion = row.LABEL
        X.append(features)
        y.append(emotion)

    return np.array(X), y

X_audio, y = load_data()

File: 1
File: 2
File: 3
File: 4
File: 5
File: 6
File: 7
File: 8
File: 9
File: 10
File: 11
File: 12
File: 13
File: 14
File: 15
File: 16
File: 17
File: 18
File: 19
File: 20
File: 21
File: 22
File: 23
File: 24
File: 25
File: 26
File: 27
File: 28
File: 29
File: 30
File: 31
File: 32
File: 33
File: 34
File: 35
File: 36
File: 37
File: 38
File: 39
File: 40
File: 41
File: 42
File: 43
File: 44
File: 45
File: 46
File: 47
File: 48
File: 49
File: 50
File: 51
File: 52
File: 53
File: 54
File: 55
File: 56
File: 57
File: 58
File: 59
File: 60
File: 61
File: 62
File: 63
File: 64
File: 65
File: 66
File: 67
File: 68
File: 69
File: 70
File: 71
File: 72
File: 73
File: 74
File: 75
File: 76
File: 77
File: 78
File: 79
File: 80
File: 81
File: 82
File: 83
File: 84
File: 85
File: 86
File: 87
File: 88
File: 89
File: 90
File: 91
File: 92
File: 93
File: 94
File: 95
File: 96
File: 97
File: 98
File: 99
File: 100
File: 101
File: 102
File: 103
File: 104
File: 105
File: 106
File: 107
File: 108
File: 109
File: 110
File: 11

File: 829
File: 830
File: 831
File: 832
File: 833
File: 834
File: 835
File: 836
File: 837
File: 838
File: 839
File: 840
File: 841
File: 842
File: 843
File: 844
File: 845
File: 846
File: 847
File: 848
File: 849
File: 850
File: 851
File: 852
File: 853
File: 854
File: 855
File: 856
File: 857
File: 858
File: 859
File: 860
File: 861
File: 862
File: 863
File: 864
File: 865
File: 866
File: 867
File: 868
File: 869
File: 870
File: 871
File: 872
File: 873
File: 874
File: 875
File: 876
File: 877
File: 878
File: 879
File: 880
File: 881
File: 882
File: 883
File: 884
File: 885
File: 886
File: 887
File: 888
File: 889
File: 890
File: 891
File: 892
File: 893
File: 894
File: 895
File: 896
File: 897
File: 898
File: 899
File: 900
File: 901
File: 902
File: 903
File: 904
File: 905
File: 906
File: 907
File: 908
File: 909
File: 910
File: 911
File: 912
File: 913
File: 914
File: 915
File: 916
File: 917
File: 918
File: 919
File: 920
File: 921
File: 922
File: 923
File: 924
File: 925
File: 926
File: 927
File: 928


File: 1591
File: 1592
File: 1593
File: 1594
File: 1595
File: 1596
File: 1597
File: 1598
File: 1599
File: 1600
File: 1601
File: 1602
File: 1603
(40, 759)
File: 1604
File: 1605
File: 1606
File: 1607
File: 1608
File: 1609
File: 1610
File: 1611
File: 1612
File: 1613
File: 1614
File: 1615
File: 1616
File: 1617
File: 1618
File: 1619
File: 1620
File: 1621
File: 1622
File: 1623
File: 1624
File: 1625
File: 1626
File: 1627
File: 1628
File: 1629
File: 1630
File: 1631
File: 1632
File: 1633
File: 1634
File: 1635
File: 1636
File: 1637
File: 1638
File: 1639
File: 1640
File: 1641
File: 1642
File: 1643
File: 1644
File: 1645
(40, 771)
File: 1646
File: 1647
File: 1648
File: 1649
File: 1650
File: 1651
File: 1652
File: 1653
File: 1654
File: 1655
File: 1656
File: 1657
File: 1658
File: 1659
File: 1660
File: 1661
File: 1662
File: 1663
File: 1664
File: 1665
File: 1666
File: 1667
File: 1668
File: 1669
File: 1670
File: 1671
File: 1672
File: 1673
File: 1674
File: 1675
File: 1676
File: 1677
File: 1678
File: 1679
F

File: 2334
File: 2335
File: 2336
File: 2337
File: 2338
File: 2339
File: 2340
File: 2341
File: 2342
File: 2343
File: 2344
File: 2345
File: 2346
File: 2347
File: 2348
File: 2349
File: 2350
File: 2351
File: 2352
File: 2353
File: 2354
File: 2355
File: 2356
File: 2357
File: 2358
File: 2359
File: 2360
File: 2361
File: 2362
File: 2363
File: 2364
File: 2365
File: 2366
(40, 766)
File: 2367
File: 2368
File: 2369
File: 2370
File: 2371
File: 2372
File: 2373
File: 2374
File: 2375
File: 2376
File: 2377
File: 2378
File: 2379
File: 2380
File: 2381
File: 2382
File: 2383
File: 2384
File: 2385
File: 2386
File: 2387
File: 2388
File: 2389
File: 2390
File: 2391
File: 2392
File: 2393
File: 2394
File: 2395
File: 2396
File: 2397
File: 2398
File: 2399
File: 2400
File: 2401
File: 2402
File: 2403
File: 2404
File: 2405
File: 2406
File: 2407
File: 2408
File: 2409
File: 2410
File: 2411
File: 2412
File: 2413
File: 2414
File: 2415
File: 2416
File: 2417
File: 2418
File: 2419
File: 2420
File: 2421
File: 2422
File: 2423


File: 3079
File: 3080
File: 3081
File: 3082
File: 3083
File: 3084
File: 3085
File: 3086
File: 3087
File: 3088
File: 3089
File: 3090
File: 3091
File: 3092
File: 3093
File: 3094
File: 3095
File: 3096
File: 3097
File: 3098
File: 3099
File: 3100
File: 3101
File: 3102
File: 3103
File: 3104
File: 3105
File: 3106
File: 3107
File: 3108
File: 3109
File: 3110
File: 3111
File: 3112
File: 3113
File: 3114
File: 3115
File: 3116
File: 3117
File: 3118
File: 3119
File: 3120
File: 3121
File: 3122
File: 3123
File: 3124
File: 3125
File: 3126
File: 3127
File: 3128
File: 3129
File: 3130
File: 3131
File: 3132
File: 3133
File: 3134
File: 3135
File: 3136
File: 3137
File: 3138
File: 3139
File: 3140
File: 3141
File: 3142
File: 3143
File: 3144
File: 3145
File: 3146
File: 3147
File: 3148
File: 3149
File: 3150
File: 3151
File: 3152
File: 3153
File: 3154
File: 3155
File: 3156
File: 3157
File: 3158
File: 3159
File: 3160
File: 3161
File: 3162
File: 3163
File: 3164
File: 3165
File: 3166
File: 3167
File: 3168
File: 3169

File: 3825
File: 3826
File: 3827
File: 3828
File: 3829
File: 3830
File: 3831
File: 3832
File: 3833
File: 3834
File: 3835
File: 3836
File: 3837
File: 3838
File: 3839
File: 3840
File: 3841
File: 3842
File: 3843
File: 3844
File: 3845
File: 3846
File: 3847
File: 3848
File: 3849
File: 3850
File: 3851
File: 3852
File: 3853
File: 3854
File: 3855
File: 3856
File: 3857
File: 3858
File: 3859
File: 3860
File: 3861
File: 3862
File: 3863
File: 3864
File: 3865
File: 3866
File: 3867
File: 3868
File: 3869
File: 3870
File: 3871
File: 3872
File: 3873
File: 3874
File: 3875
File: 3876
File: 3877
File: 3878
File: 3879
File: 3880
File: 3881
File: 3882
File: 3883
File: 3884
File: 3885
File: 3886
File: 3887
File: 3888
File: 3889
File: 3890
File: 3891
File: 3892
File: 3893
File: 3894
File: 3895
File: 3896
File: 3897
File: 3898
File: 3899
File: 3900
File: 3901
File: 3902
File: 3903
File: 3904
File: 3905
File: 3906
File: 3907
File: 3908
File: 3909
File: 3910
File: 3911
File: 3912
File: 3913
File: 3914
File: 3915

File: 4575
File: 4576
File: 4577
File: 4578
File: 4579
File: 4580
File: 4581
File: 4582
File: 4583
File: 4584
File: 4585
File: 4586
File: 4587
File: 4588
File: 4589
File: 4590
File: 4591
File: 4592
File: 4593
File: 4594
File: 4595
File: 4596
File: 4597
File: 4598
File: 4599
File: 4600
File: 4601
File: 4602
File: 4603
File: 4604
File: 4605
File: 4606
File: 4607
File: 4608
File: 4609
File: 4610
File: 4611
File: 4612
File: 4613
File: 4614
File: 4615
File: 4616
File: 4617
File: 4618
File: 4619
File: 4620
File: 4621
File: 4622
File: 4623
File: 4624
File: 4625
File: 4626
File: 4627
File: 4628
File: 4629
File: 4630
File: 4631
File: 4632
File: 4633
File: 4634
File: 4635
File: 4636
File: 4637
File: 4638
File: 4639
File: 4640
File: 4641
File: 4642
File: 4643
File: 4644
File: 4645
File: 4646
File: 4647
File: 4648
File: 4649
File: 4650
File: 4651
File: 4652
File: 4653
File: 4654
File: 4655
File: 4656
File: 4657
File: 4658
File: 4659
File: 4660
File: 4661
File: 4662
File: 4663
File: 4664
File: 4665

File: 5321
File: 5322
File: 5323
File: 5324
File: 5325
File: 5326
File: 5327
File: 5328
File: 5329
File: 5330
File: 5331
File: 5332
File: 5333
File: 5334
File: 5335
File: 5336
File: 5337
File: 5338
File: 5339
File: 5340
File: 5341
File: 5342
File: 5343
File: 5344
File: 5345
File: 5346
File: 5347
File: 5348
File: 5349
File: 5350
File: 5351
File: 5352
File: 5353
File: 5354
File: 5355
File: 5356
File: 5357
File: 5358
File: 5359
File: 5360
File: 5361
File: 5362
File: 5363
File: 5364
File: 5365
File: 5366
File: 5367
File: 5368
File: 5369
File: 5370
File: 5371
File: 5372
File: 5373
File: 5374
File: 5375
File: 5376
File: 5377
File: 5378
File: 5379
File: 5380
File: 5381
File: 5382
File: 5383
File: 5384
File: 5385
File: 5386
File: 5387
File: 5388
File: 5389
File: 5390
File: 5391
File: 5392
File: 5393
File: 5394
File: 5395
File: 5396
File: 5397
File: 5398
File: 5399
File: 5400
File: 5401
File: 5402
File: 5403
File: 5404
File: 5405
File: 5406
File: 5407
File: 5408
File: 5409
File: 5410
File: 5411

File: 6068
File: 6069
File: 6070
File: 6071
File: 6072
File: 6073
File: 6074
File: 6075
File: 6076
File: 6077
File: 6078
File: 6079
File: 6080
File: 6081
File: 6082
File: 6083
File: 6084
(40, 764)
File: 6085
File: 6086
File: 6087
File: 6088
File: 6089
File: 6090
File: 6091
File: 6092
File: 6093
File: 6094
File: 6095
File: 6096
File: 6097
File: 6098
File: 6099
File: 6100
File: 6101
File: 6102
File: 6103
File: 6104
File: 6105
File: 6106
File: 6107
File: 6108
File: 6109
File: 6110
File: 6111
File: 6112
File: 6113
File: 6114
File: 6115
File: 6116
File: 6117
File: 6118
File: 6119
File: 6120
File: 6121
File: 6122
File: 6123
File: 6124
File: 6125
File: 6126
File: 6127
File: 6128
File: 6129
File: 6130
File: 6131
File: 6132
File: 6133
File: 6134
File: 6135
File: 6136
File: 6137
File: 6138
File: 6139
File: 6140
File: 6141
File: 6142
File: 6143
File: 6144
File: 6145
File: 6146
File: 6147
File: 6148
File: 6149
File: 6150
File: 6151
File: 6152
File: 6153
File: 6154
File: 6155
File: 6156
File: 6157


File: 6813
File: 6814
File: 6815
File: 6816
File: 6817
File: 6818
File: 6819
File: 6820
File: 6821
File: 6822
File: 6823
File: 6824
File: 6825
File: 6826
File: 6827
File: 6828
File: 6829
File: 6830
File: 6831
File: 6832
File: 6833
File: 6834
File: 6835
File: 6836
File: 6837
File: 6838
(40, 865)
File: 6839
File: 6840
File: 6841
File: 6842
File: 6843
File: 6844
File: 6845
File: 6846
File: 6847
File: 6848
File: 6849
File: 6850
File: 6851
File: 6852
File: 6853
File: 6854
File: 6855
File: 6856
File: 6857
File: 6858
File: 6859
File: 6860
File: 6861
File: 6862
File: 6863
File: 6864
File: 6865
File: 6866
File: 6867
File: 6868
File: 6869
File: 6870
File: 6871
File: 6872
File: 6873
File: 6874
File: 6875
File: 6876
File: 6877
File: 6878
File: 6879
File: 6880
File: 6881
File: 6882
File: 6883
File: 6884
File: 6885
File: 6886
File: 6887
File: 6888
File: 6889
File: 6890
File: 6891
File: 6892
File: 6893
File: 6894
File: 6895
File: 6896
File: 6897
File: 6898
File: 6899
File: 6900
File: 6901
File: 6902


## Prepare Train/Test Data

In [None]:
from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

lb = LabelEncoder()
y_cat = np_utils.to_categorical(lb.fit_transform(y))

audio_train, audio_test, text_train, text_test, y_train, y_test = train_test_split(X_audio,
                                                                                   X_text,
                                                                                   y_cat, 
                                                                                   test_size = 0.25)


audio_train = np.expand_dims(audio_train, axis = 3)
audio_test = np.expand_dims(audio_test, axis = 3)

print("(>^.^)> audio_train shape:", audio_train.shape)
print("(>^.^)> audio_test shape:", audio_test.shape)
print("---")
print("(>^.^)> text_train shape:", text_train.shape)
print("(>^.^)> text_test shape:", text_test.shape)
print("---")
print("(>^.^)> y_train shape:", y_train.shape)
print("(>^.^)> y_test shape:", y_test.shape)

## CNN + LSTM -> Combine Text + Audio

In [None]:
from keras.layers import Concatenate
from keras.models import Model
from keras.layers.core import Flatten
from keras.layers import Input, Dense, Dropout, LSTM, Embedding, concatenate, RepeatVector, TimeDistributed, Activation
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D, Conv1D
from keras.layers.convolutional import MaxPooling2D, MaxPooling1D
from keras.callbacks import ModelCheckpoint

EPOCHS = 30
CONV_DIM = 128
BATCH_SIZE = 32
HIDDEN_DIM = 128
inputShape = audio_train.shape[1:]
chanDim = -1 

# Text Model
text_input = Input(shape=(MAX_LEN,))
text_model_1 = Embedding(VOCAB_SIZE, output_dim = 100, input_length = MAX_LEN, weights = [embedding_matrix], trainable =  True)(text_input)
text_model_2 = Dropout(0.25)(text_model_1)
text_model_3 = Conv1D(CONV_DIM, 5, padding='valid', activation='relu', strides=1)(text_model_2)
text_model_4 = MaxPooling1D(pool_size=4)(text_model_3)
text_model_5 = LSTM(HIDDEN_DIM, activation = 'relu')(text_model_4)

#Audio Model
audio_input = Input(shape=inputShape)
audio_model = Conv2D(CONV_DIM, (3, 3), padding="same", input_shape=inputShape)(audio_input)
audio_model = Activation("relu")(audio_model)
audio_model = BatchNormalization(axis=chanDim)(audio_model)
audio_model = MaxPooling2D(pool_size=(3, 3))(audio_model)
audio_model = Dropout(0.25)(audio_model)

audio_model = Flatten()(audio_model)
audio_model = Dense(HIDDEN_DIM)(audio_model)
audio_model = Activation("relu")(audio_model)
audio_model = BatchNormalization()(audio_model)
audio_model_2 = Dropout(0.25)(audio_model)

# Combined Model
final_model_1 = concatenate([text_model_5, audio_model_2])
final_model_2 = Dense(HIDDEN_DIM, activation='relu')(final_model_1)

final_model = Dense(4, activation='softmax')(final_model_1)

model = Model(inputs=[audio_input, text_input], outputs = final_model)
model.compile(loss='categorical_crossentropy', optimizer= 'adam', metrics = ['accuracy'])    
model.summary()

# Checkpoint
filepath = "main_weights.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=0, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

# Fit Data
history = model.fit([audio_train, text_train], y_train, batch_size = BATCH_SIZE, epochs = EPOCHS, callbacks=callbacks_list, 
                     validation_data = ([audio_test, text_test], y_test))

Model: "model_14"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_29 (InputLayer)           (None, 40, 300, 1)   0                                            
__________________________________________________________________________________________________
conv2d_14 (Conv2D)              (None, 40, 300, 128) 1280        input_29[0][0]                   
__________________________________________________________________________________________________
activation_27 (Activation)      (None, 40, 300, 128) 0           conv2d_14[0][0]                  
__________________________________________________________________________________________________
batch_normalization_27 (BatchNo (None, 40, 300, 128) 512         activation_27[0][0]              
___________________________________________________________________________________________

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 24000 samples, validate on 3200 samples
Epoch 1/30


------------------------

### Save Model

In [None]:
with open('main_model.json', 'w') as f:
    f.write(model.to_json())

import pickle
f = open("main_dic.pkl","wb")
pickle.dump(input_word_dict,f)
f.close()

### Load Model

In [None]:
import keras
from keras.models import model_from_json

Adam = keras.optimizers.Adam(learning_rate=0.0001)

### Main Model
# Model reconstruction from JSON file
with open('main_model.json', 'r') as f:
    main_model = model_from_json(f.read())

## Visualization

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'], linewidth=2)

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.ylim(0,10)
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()


plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'], linewidth=2)

plt.title('model acc')
plt.ylabel('acc')
plt.xlabel('epoch')
plt.legend(['Train', 'Test'], loc='lower right')
plt.show()

## Prediction

In [None]:
### Clean-text function
import re
def clean_text(text):
    text = text.lower()
    
    text = re.sub(r"i'm", "i am", text)
    text = re.sub(r"he's", "he is", text)
    text = re.sub(r"she's", "she is", text)
    text = re.sub(r"it's", "it is", text)
    text = re.sub(r"that's", "that is", text)
    text = re.sub(r"what's", "that is", text)
    text = re.sub(r"where's", "where is", text)
    text = re.sub(r"how's", "how is", text)
    text = re.sub(r"\'ll", " will", text)
    text = re.sub(r"\'ve", " have", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"\'d", " would", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"won't", "will not", text)
    text = re.sub(r"can't", "cannot", text)
    text = re.sub(r"n't", " not", text)
    text = re.sub(r"n'", "ng", text)
    text = re.sub(r"'bout", "about", text)
    text = re.sub(r"'til", "until", text)
    text = re.sub(r"[-()\"#/@;:<>{}`+=~|.!?,]", "", text)
    
    return text

In [None]:
def load_vocab(vocab):
    with open('Load_Files/' + vocab + '.pkl', 'rb') as f:
        return pickle.load(f)

In [None]:
def encoding_input( sentence : str ):
    sentence = '<BOS> ' + sentence + ' <EOS>' # add bos and eos tokens
    words = sentence.lower().split()
    tokens_list = list()
    dictionary = load_vocab('main_dic')

    for word in words:
        try:
            tokens_list.append(dictionary[ word ])
        except Exception as e:
            tokens_list.append(dictionary[ '<unk>' ])
        
    return preprocessing.sequence.pad_sequences([tokens_list] , maxlen = MAX_LEN , padding='post')

### New Input

In [None]:
def new_input(file_path, sentence):
    X = []
    features = extract_features(file_path)
    X.append(features)
    X = np.expand_dims(np.array(X), axis = 3)
    encoding = encoding_input(sentence)

    y_pred = main_model.predict([X, encoding])
    emotion = np.argmax(y_pred[0])
    print(emotion)