In [2]:
!pip install opensmile

Collecting opensmile
  Using cached opensmile-2.5.0-py3-none-win_amd64.whl.metadata (15 kB)
Collecting audobject>=0.6.1 (from opensmile)
  Using cached audobject-0.7.11-py3-none-any.whl.metadata (2.6 kB)
Collecting audinterface>=0.7.0 (from opensmile)
  Using cached audinterface-1.2.1-py3-none-any.whl.metadata (4.2 kB)
Collecting audeer>=1.18.0 (from audinterface>=0.7.0->opensmile)
  Using cached audeer-2.0.0-py3-none-any.whl.metadata (4.1 kB)
Collecting audformat<2.0.0,>=1.0.1 (from audinterface>=0.7.0->opensmile)
  Using cached audformat-1.1.4-py3-none-any.whl.metadata (4.6 kB)
Collecting audiofile>=1.3.0 (from audinterface>=0.7.0->opensmile)
  Using cached audiofile-1.4.0-py3-none-any.whl.metadata (4.9 kB)
Collecting audmath>=1.3.0 (from audinterface>=0.7.0->opensmile)
  Using cached audmath-1.4.0-py3-none-any.whl.metadata (3.7 kB)
Collecting audresample<2.0.0,>=1.1.0 (from audinterface>=0.7.0->opensmile)
  Using cached audresample-1.3.3-py3-none-win_amd64.whl.metadata (4.4 kB)
Coll

In [35]:
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras import layers
from tensorflow.keras.models import Model
import tensorflow as tf
import pandas as pd
import opensmile

In [4]:
df = pd.DataFrame({ 'id': [fn[:3] for fn in os.listdir('./dataset') if fn.endswith('.wav')] })
df.head()

Unnamed: 0,id
0,300
1,301
2,302
3,303
4,304


In [5]:
is_depressed = {}
for fn in [
    'dev_split_Depression_AVEC2017.csv',
    'train_split_Depression_AVEC2017.csv',
    'test_split_Depression_AVEC2017.csv',
    'full_test_split.csv']:
    with open('./dataset/'+fn, 'r') as f:
        for row in f.readlines()[1:]:
            vals = row[:-1].split(',')
            if len(vals) >= 2:
                is_depressed[vals[0]] = int(vals[1])
df['is_depressed'] = df['id'].apply(lambda id: is_depressed[id] if id in is_depressed else 0)
df.head()

Unnamed: 0,id,is_depressed
0,300,0
1,301,0
2,302,0
3,303,0
4,304,0


In [33]:
smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.ComParE_2016,
    feature_level=opensmile.FeatureLevel.Functionals,
)
smile_df = smile.process_files([f'./dataset/{id}_AUDIO.wav' for id in df['id'].tolist()])
smile_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,audspec_lengthL1norm_sma_range,audspec_lengthL1norm_sma_maxPos,audspec_lengthL1norm_sma_minPos,audspec_lengthL1norm_sma_quartile1,audspec_lengthL1norm_sma_quartile2,audspec_lengthL1norm_sma_quartile3,audspec_lengthL1norm_sma_iqr1-2,audspec_lengthL1norm_sma_iqr2-3,audspec_lengthL1norm_sma_iqr1-3,audspec_lengthL1norm_sma_percentile1.0,...,mfcc_sma_de[14]_peakRangeAbs,mfcc_sma_de[14]_peakRangeRel,mfcc_sma_de[14]_peakMeanAbs,mfcc_sma_de[14]_peakMeanMeanDist,mfcc_sma_de[14]_peakMeanRel,mfcc_sma_de[14]_minRangeRel,mfcc_sma_de[14]_meanRisingSlope,mfcc_sma_de[14]_stddevRisingSlope,mfcc_sma_de[14]_meanFallingSlope,mfcc_sma_de[14]_stddevFallingSlope
file,start,end,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
./dataset/300_AUDIO.wav,0 days,0 days 00:10:48.500000,2.835027,0.331277,0.016178,0.073676,0.09267,0.169379,0.018994,0.076709,0.095703,0.060193,...,8.667942,0.601993,1.848118,1.84817,-20.0,0.581762,89.249535,44.508762,88.691612,44.451393
./dataset/301_AUDIO.wav,0 days,0 days 00:13:43.900000,1.563116,0.992086,0.023573,0.065601,0.094496,0.226772,0.028895,0.132276,0.16117,0.059034,...,9.814976,0.58523,2.147391,2.147398,-20.0,0.601254,95.877174,49.022385,94.419037,48.620708
./dataset/302_AUDIO.wav,0 days,0 days 00:12:38.800000,0.844108,0.95416,0.000699,0.061128,0.065454,0.101554,0.004327,0.0361,0.040426,0.056394,...,9.711348,0.589838,1.997163,1.997292,-20.0,0.609909,93.560135,47.109814,93.347351,46.97121
./dataset/303_AUDIO.wav,0 days,0 days 00:16:25.300000,1.446217,0.840322,0.039026,0.062715,0.098875,0.221331,0.03616,0.122456,0.158616,0.054199,...,11.710479,0.557672,2.333878,2.333952,-20.0,0.636285,97.992241,51.536892,97.227127,51.981255
./dataset/304_AUDIO.wav,0 days,0 days 00:13:12.600000,1.083328,0.410899,0.023747,0.057617,0.074131,0.142586,0.016514,0.068455,0.084969,0.051853,...,10.999923,0.598984,2.158344,2.158308,20.0,0.548527,97.093452,48.878418,95.413612,50.013947


In [101]:
#smile_df = smile_df.drop('file', axis=1)
X_train, X_test, y_train, y_test = train_test_split(smile_df, df['is_depressed'], stratify=df['is_depressed'])

inp = layers.Input(shape=(6373))
out = layers.Dense(128, activation="relu")(inp)
out = layers.Dense(64, activation="relu")(out)
out = layers.Dense(32, activation="relu")(out)
out = layers.Dense(1, activation='sigmoid')(out)
model = Model(inputs=inp, outputs=out)

optimizer = tf.keras.optimizers.Adam(learning_rate= 0.001)
model.compile(optimizer=optimizer, loss='binary_crossentropy')
model.summary()

Model: "model_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_25 (InputLayer)       [(None, 6373)]            0         
                                                                 
 dense_26 (Dense)            (None, 128)               815872    
                                                                 
 dense_27 (Dense)            (None, 64)                8256      
                                                                 
 dense_28 (Dense)            (None, 32)                2080      
                                                                 
 dense_29 (Dense)            (None, 1)                 33        
                                                                 
Total params: 826,241
Trainable params: 826,241
Non-trainable params: 0
_________________________________________________________________


In [102]:
model.fit(x=X_train, y=y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x22642bbb250>

In [103]:
pred = model.predict(X_test)
print(classification_report(y_test, pred))

              precision    recall  f1-score   support

           0       0.70      1.00      0.82        32
           1       0.00      0.00      0.00        14

    accuracy                           0.70        46
   macro avg       0.35      0.50      0.41        46
weighted avg       0.48      0.70      0.57        46



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
